Skip to content

Commit

Permalink
Add new properties error_rate and ksize
Browse files Browse the repository at this point in the history
  • Loading branch information
luizirber committed Feb 24, 2015
1 parent 21faa5a commit 60e2661
Show file tree
Hide file tree
Showing 5 changed files with 201 additions and 51 deletions.
92 changes: 78 additions & 14 deletions khmer/_khmermodule.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4339,17 +4339,17 @@ static PyObject* khmer_hllcounter_new(PyTypeObject * type, PyObject * args,
self = (khmer_KHLLCounter_Object *)type->tp_alloc(type, 0);

if (self != NULL) {
double error_rate = 0;
WordLength ksize = 0;
double error_rate = 0.01;
WordLength ksize = 20;

if (!PyArg_ParseTuple(args, "db", &error_rate, &ksize)) {
if (!PyArg_ParseTuple(args, "|db", &error_rate, &ksize)) {
Py_DECREF(self);
return NULL;
}

try {
self->hllcounter = new HLLCounter(error_rate, ksize);
} catch (khmer_exception &e) {
} catch (InvalidValue &e) {
Py_DECREF(self);
PyErr_SetString(PyExc_ValueError, e.what());
return NULL;
Expand Down Expand Up @@ -4450,16 +4450,76 @@ static PyObject * hllcounter_consume_fasta(khmer_KHLLCounter_Object * me,

static
PyObject *
hllcounter_getp(khmer_KHLLCounter_Object * me)
hllcounter_get_erate(khmer_KHLLCounter_Object * me)
{
return PyLong_FromLong(me->hllcounter->get_p());
return PyFloat_FromDouble(me->hllcounter->get_erate());
}

static
PyObject *
hllcounter_getm(khmer_KHLLCounter_Object * me)
hllcounter_get_ksize(khmer_KHLLCounter_Object * me)
{
return PyLong_FromLong(me->hllcounter->get_m());
return PyLong_FromLong(me->hllcounter->get_ksize());
}

static
int
hllcounter_set_ksize(khmer_KHLLCounter_Object * me, PyObject *value,
void *closure)
{
if (value == NULL) {
PyErr_SetString(PyExc_TypeError, "Cannot delete attribute");
return -1;
}

if (!PyLong_Check(value) && !PyInt_Check(value)) {
PyErr_SetString(PyExc_TypeError,
"Please use an int value for k-mer size");
return -1;
}

WordLength ksize = PyLong_AsLong(value);
try {
me->hllcounter->set_ksize(ksize);
} catch (InvalidValue &e) {
PyErr_SetString(PyExc_ValueError, e.what());
return -1;
} catch (ReadOnlyAttribute &e) {
PyErr_SetString(PyExc_AttributeError, e.what());
return -1;
}

return 0;
}

static
int
hllcounter_set_erate(khmer_KHLLCounter_Object * me, PyObject *value,
void *closure)
{
if (value == NULL) {
PyErr_SetString(PyExc_TypeError, "Cannot delete attribute");
return -1;
}

if (!PyFloat_Check(value)) {
PyErr_SetString(PyExc_TypeError,
"Please use a float value for k-mer size");
return -1;
}

double erate = PyFloat_AsDouble(value);
try {
me->hllcounter->set_erate(erate);
} catch (InvalidValue &e) {
PyErr_SetString(PyExc_ValueError, e.what());
return -1;
} catch (ReadOnlyAttribute &e) {
PyErr_SetString(PyExc_AttributeError, e.what());
return -1;
}

return 0;
}

static
Expand Down Expand Up @@ -4516,15 +4576,19 @@ static PyGetSetDef khmer_hllcounter_getseters[] = {
NULL
},
{
"p",
(getter)hllcounter_getp, NULL,
"p for this HLL counter. The number of internal counters m is 2 ** p",
"error_rate",
(getter)hllcounter_get_erate, (setter)hllcounter_set_erate,
"Error rate for this HLL counter. "
"Can be changed prior to first counting, but becomes read-only after "
"that (raising AttributeError)",
NULL
},
{
"m",
(getter)hllcounter_getm, NULL,
"Number of internal counters for this HLL counter.",
"ksize",
(getter)hllcounter_get_ksize, (setter)hllcounter_set_ksize,
"k-mer size for this HLL counter."
"Can be changed prior to first counting, but becomes read-only after "
"that (raising AttributeError)",
NULL
},
{
Expand Down
43 changes: 40 additions & 3 deletions lib/hllcounter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,12 @@ double calc_alpha(const int p)
{
if (p < 4) {
// ceil(log2((1.04 / x) ^ 2)) = 4, solve for x
throw khmer_exception("Error rate must be smaller than 0.367696");
throw InvalidValue("Please set error rate to a value "
"smaller than 0.367696");
} else if (p > 16) {
// ceil(log2((1.04 / x) ^ 2)) = 16, solve for x
throw khmer_exception("Error rate must be greater than 0.0040624");
throw InvalidValue("Please set error rate to a value "
"greater than 0.0040624");
}

/*
Expand Down Expand Up @@ -232,7 +234,8 @@ int get_rho(HashIntoType w, int max_width)
HLLCounter::HLLCounter(double error_rate, WordLength ksize)
{
if (error_rate < 0) {
throw khmer_exception("Error rate should be a positive number");
throw InvalidValue("Please set error rate to a value "
"greater than zero");
}
int p = ceil(log2(pow(1.04 / error_rate, 2)));
this->init(p, ksize);
Expand All @@ -256,6 +259,40 @@ void HLLCounter::init(int p, WordLength ksize)
init_bias_data();
}

double HLLCounter::get_erate()
{
return 1.04 / sqrt(this->m);
}

void HLLCounter::set_erate(double error_rate)
{
if (count(this->M.begin(), this->M.end(), 0) != this->m) {
throw ReadOnlyAttribute("You can only change error rate prior to "
"first counting");
}

if (error_rate < 0) {
throw InvalidValue("Please set error rate to a value "
"greater than zero");
}
int p = ceil(log2(pow(1.04 / error_rate, 2)));
this->init(p, this->_ksize);
}

void HLLCounter::set_ksize(WordLength new_ksize)
{
if (count(this->M.begin(), this->M.end(), 0) != this->m) {
throw ReadOnlyAttribute("You can only change k-mer size prior to "
"first counting");
}

if (new_ksize <= 0) {
throw InvalidValue("Please set k-mer size to a value "
"greater than zero");
}
this->init(this->p, new_ksize);
}

double HLLCounter::_Ep()
{
double sum = accumulate(this->M.begin(), this->M.end(), 0.0, ep_sum);
Expand Down
7 changes: 7 additions & 0 deletions lib/hllcounter.hh
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,17 @@ public:
{
return m;
}
void set_ksize(WordLength new_ksize);
int get_ksize()
{
return _ksize;
}
std::vector<int> get_M()
{
return M;
}
double get_erate();
void set_erate(double new_erate);
private:
double _Ep();
double alpha;
Expand Down
25 changes: 25 additions & 0 deletions lib/khmer_exception.hh
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,31 @@ public:
StreamReadError(const char * msg) : khmer_file_exception(msg) {}
};


///
// An exception for invalid arguments to functions
//

class InvalidValue : public khmer_exception
{
public:
explicit InvalidValue(const char * msg) : khmer_exception(msg) { }
explicit InvalidValue(const std::string& msg)
: khmer_exception(msg) { }
};

///
// An exception for trying to change a read-only attributes
//

class ReadOnlyAttribute : public khmer_exception
{
public:
explicit ReadOnlyAttribute(const char * msg) : khmer_exception(msg) { }
explicit ReadOnlyAttribute(const std::string& msg)
: khmer_exception(msg) { }
};

}

#endif // KHMER_EXCEPTION_HH
Expand Down
Loading

0 comments on commit 60e2661

Please sign in to comment.