Skip to content

Commit 59c3b75

Browse files
authored
Merge pull request wush978#131 from wush978/dev/cran-v0.9.1.4
- fix wush978#130
2 parents f2dc913 + c074f53 commit 59c3b75

File tree

9 files changed

+47
-102
lines changed

9 files changed

+47
-102
lines changed

DESCRIPTION

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
Package: FeatureHashing
22
Type: Package
33
Title: Creates a Model Matrix via Feature Hashing with a Formula Interface
4-
Version: 0.9.1.3
5-
Date: 2018-04-10
4+
Version: 0.9.1.4
5+
Date: 2019-11-24
66
Authors@R: c(
77
person("Wush", "Wu", email = "wush978@gmail.com", role = c("aut", "cre")),
88
person("Michael", "Benesty", email = "michael@benesty.fr", role = c("aut", "ctb")))
@@ -23,8 +23,8 @@ Imports:
2323
Matrix,
2424
digest(>= 0.6.8),
2525
magrittr (>= 1.5)
26-
LinkingTo: Rcpp, digest(>= 0.6.8), BH
27-
Suggests: RUnit, glmnet, knitr, xgboost, rmarkdown
26+
LinkingTo: Rcpp, digest(>= 0.6.8), BH(>= 1.54.0-1)
27+
Suggests: RUnit, glmnet, knitr, xgboost, rmarkdown, pROC
2828
SystemRequirements: C++11
2929
BugReports: https://github.com/wush978/FeatureHashing/issues
3030
URL: https://github.com/wush978/FeatureHashing

src/bswap_32.cpp

Lines changed: 0 additions & 28 deletions
This file was deleted.

src/bswap_32.h

Lines changed: 0 additions & 37 deletions
This file was deleted.

src/hash_function.h

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,15 @@
1919
#ifndef __HASH_FUNCTION_HPP__
2020
#define __HASH_FUNCTION_HPP__
2121

22-
#include <boost/detail/endian.hpp>
22+
#include <boost/predef/other/endian.h>
23+
#include <boost/endian/conversion.hpp>
2324
#include "digestlocal.h"
24-
#include "bswap_32.h"
2525

2626
class HashFunction {
2727

2828
public:
29+
30+
virtual ~HashFunction() { }
2931

3032
virtual uint32_t operator()(const char* buf, int size, bool is_interaction = false) = 0;
3133

@@ -34,6 +36,8 @@ class HashFunction {
3436
class NullHashFunction : public HashFunction {
3537

3638
public:
39+
40+
virtual ~NullHashFunction() { }
3741

3842
virtual uint32_t operator()(const char* buf, int size, bool is_interaction = false) {
3943
return 1;
@@ -48,6 +52,8 @@ class MurmurHash3HashFunction : public HashFunction {
4852
public :
4953

5054
MurmurHash3HashFunction(uint32_t _seed) : seed(_seed) { }
55+
56+
virtual ~MurmurHash3HashFunction() { }
5157

5258
virtual uint32_t operator()(const char* buf, int size, bool is_interaction = false) {
5359
return ::PMurHash32(seed, buf, size);
@@ -66,22 +72,26 @@ class MurmurHash3LogHashFunction : public HashFunction {
6672
: HashFunction(), seed(_seed), e(_e)
6773
{ }
6874

75+
virtual ~MurmurHash3LogHashFunction() { }
76+
6977
virtual uint32_t operator()(const char* buf, int size, bool is_interaction = false) {
7078
uint32_t retval = PMurHash32(seed, buf, size);
7179
if (is_interaction) {
7280
const uint32_t* src = reinterpret_cast<const uint32_t*>(buf);
73-
#ifdef BOOST_BIG_ENDIAN
74-
if (inverse_mapping.find(bswap_32(src[0])) == inverse_mapping.end()) throw std::logic_error("interaction is hashed before main effect!");
75-
if (inverse_mapping.find(bswap_32(src[1])) == inverse_mapping.end()) throw std::logic_error("interaction is hashed before main effect!");
76-
std::string key(inverse_mapping[bswap_32(src[0])]);
81+
#if BOOST_ENDIAN_BIG_BYTE && !BOOST_ENDIAN_LITTLE_BYTE
82+
if (inverse_mapping.find(boost::endian::endian_reverse(src[0])) == inverse_mapping.end()) throw std::logic_error("interaction is hashed before main effect!");
83+
if (inverse_mapping.find(boost::endian::endian_reverse(src[1])) == inverse_mapping.end()) throw std::logic_error("interaction is hashed before main effect!");
84+
std::string key(inverse_mapping[boost::endian::endian_reverse(src[0])]);
7785
key.append(":");
78-
key.append(inverse_mapping[bswap_32(src[1])]);
79-
#else
86+
key.append(inverse_mapping[boost::endian::endian_reverse(src[1])]);
87+
#elif !BOOST_ENDIAN_BIG_BYTE && BOOST_ENDIAN_LITTLE_BYTE
8088
if (inverse_mapping.find(src[0]) == inverse_mapping.end()) throw std::logic_error("interaction is hashed before main effect!");
8189
if (inverse_mapping.find(src[1]) == inverse_mapping.end()) throw std::logic_error("interaction is hashed before main effect!");
8290
std::string key(inverse_mapping[src[0]]);
8391
key.append(":");
8492
key.append(inverse_mapping[src[1]]);
93+
#else
94+
#error Unknown endianness
8595
#endif
8696
e[key.c_str()] = Rcpp::wrap((int) retval);
8797
inverse_mapping[retval] = key;

src/hash_internal.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818

1919
#include <cstring>
2020
#include <deque>
21-
#include <boost/detail/endian.hpp>
2221
#include <boost/algorithm/string.hpp>
2322
#include <Rcpp.h>
2423
#include "hash_function.h"
@@ -52,12 +51,14 @@ IntegerVector h(CharacterVector src) {
5251

5352
MH_UINT32 interaction(MH_UINT32 a, MH_UINT32 b) {
5453
MH_UINT32 buf[2];
55-
#ifdef BOOST_BIG_ENDIAN
56-
buf[0] = bswap_32(a);
57-
buf[1] = bswap_32(b);
58-
#else
54+
#if BOOST_ENDIAN_BIG_BYTE && !BOOST_ENDIAN_LITTLE_BYTE
55+
buf[0] = boost::endian::endian_reverse(a);
56+
buf[1] = boost::endian::endian_reverse(b);
57+
#elif !BOOST_ENDIAN_BIG_BYTE && BOOST_ENDIAN_LITTLE_BYTE
5958
buf[0] = a;
6059
buf[1] = b;
60+
#else
61+
#error Unknown endianness
6162
#endif
6263
return PMurHash32(MURMURHASH3_H_SEED, reinterpret_cast<char*>(buf), sizeof(MH_UINT32) * 2);
6364
}

src/hashed_model_matrix.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,9 @@ const ConvertersVec get_converters(
173173
} else {
174174
pVectorConverter q(*retval.rbegin());
175175
Param param("", _h_main, _h_binary, hash_size);
176+
#ifdef NOISY_DEBUG
177+
Rprintf("Initialize InteractionConverter\n");
178+
#endif
176179
*retval.rbegin() = pInteractionConverter(new InteractionConverter(q, p, param));
177180
}
178181
}
@@ -222,7 +225,7 @@ SEXP hashed_model_matrix(RObject tf, DataFrameLike data, unsigned long hash_size
222225
});
223226
Rprintf("\n");
224227
#endif
225-
std::for_each(i_origin.begin(), i_origin.end(), [&ivec, &xvec, &hash_size](uint32_t hashed_value) {
228+
std::for_each(i_origin.begin(), i_origin.end(), [&ivec](uint32_t hashed_value) {
226229
ivec.push_back(hashed_value);
227230
});
228231
xvec.insert(xvec.end(), x_origin.begin(), x_origin.end());
@@ -248,7 +251,7 @@ SEXP hashed_model_matrix(RObject tf, DataFrameLike data, unsigned long hash_size
248251
const std::vector<uint32_t>& i_origin(p->get_feature(i));
249252
const std::vector<double>& x_origin(p->get_value(i));
250253
auto x_value = x_origin.begin();
251-
std::for_each(i_origin.begin(), i_origin.end(), [&cache, &hash_size, &x_value, &i](uint32_t hashed_value) {
254+
std::for_each(i_origin.begin(), i_origin.end(), [&cache, &x_value, &i](uint32_t hashed_value) {
252255
std::pair< std::vector<int>, std::vector<double> >& k(cache[hashed_value]);
253256
k.first.push_back(i);
254257
k.second.push_back(*(x_value++));

src/hashed_model_matrix.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121

2222
#include <cstring>
2323
#include <memory>
24-
#include <boost/detail/endian.hpp>
2524
#include <Rcpp.h>
2625
#include "hash_function.h"
2726
#include "vector_converter.h"

src/intToRaw.cpp

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,9 @@
11
#include <Rcpp.h>
2-
#include <boost/detail/endian.hpp>
2+
#include <boost/predef/other/endian.h>
3+
#include <boost/endian/conversion.hpp>
34

4-
#ifdef linux
5-
#include <byteswap.h>
6-
#endif
7-
8-
#ifndef bswap_32
9-
#ifdef __APPLE__
10-
#include <libkern/OSByteOrder.h>
11-
#else
12-
uint32_t bswap_32(uint32_t x);
13-
#endif
5+
#ifndef BOOST_ENDIAN_BIG_BYTE
6+
#error No BOOST_ENDIAN_BIG_BYTE
147
#endif
158

169
using namespace Rcpp;
@@ -23,10 +16,12 @@ using namespace Rcpp;
2316
SEXP intToRaw(int src) {
2417
RawVector retval(4);
2518
uint32_t *p = (uint32_t*) &retval[0];
26-
#ifdef BOOST_BIG_ENDIAN
27-
p[0] = bswap_32(src);
28-
#else
19+
#if BOOST_ENDIAN_BIG_BYTE && !BOOST_ENDIAN_LITTLE_BYTE
20+
p[0] = boost::endian::endian_reverse(src);
21+
#elif !BOOST_ENDIAN_BIG_BYTE && BOOST_ENDIAN_LITTLE_BYTE
2922
p[0] = src;
23+
#else
24+
#error Unknown endianness
3025
#endif
3126
return retval;
3227
}

vignettes/FeatureHashing.Rmd

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,9 @@ library(glmnet)
9292
cv.g.lr <- cv.glmnet(m.train, ipinyou.train$IsClick,
9393
family = "binomial")#, type.measure = "auc")
9494
p.lr <- predict(cv.g.lr, m.test, s="lambda.min")
95-
auc(ipinyou.test$IsClick, p.lr)
95+
96+
library(pROC)
97+
auc(ipinyou.test$IsClick, c(p.lr))
9698
```
9799

98100
## Gradient Boosted Decision Tree with [`xgboost`](https://cran.r-project.org/package=xgboost)
@@ -103,9 +105,9 @@ Following the script above,
103105
# GBDT with xgboost
104106
if(require("xgboost")){
105107
cv.g.gdbt <- xgboost(m.train, ipinyou.train$IsClick, max.depth=7, eta=0.1, subsample = 0.7, colsample_bytree = 0.7,
106-
nround = 100, objective = "binary:logistic", verbose = ifelse(interactive(), 1, 0))
108+
nround = 10, objective = "binary:logistic", verbose = ifelse(interactive(), 1, 0))
107109
p.lm <- predict(cv.g.gdbt, m.test)
108-
glmnet::auc(ipinyou.test$IsClick, p.lm)
110+
auc(ipinyou.test$IsClick, p.lm)
109111
}
110112
111113
```

0 commit comments

Comments
 (0)