Skip to content

Commit

Permalink
One more optimization in the decode range function, added code to tes…
Browse files Browse the repository at this point in the history
…t with over 4000 test cases, additional comments
  • Loading branch information
cjriley9 committed Oct 3, 2021
1 parent 1a5147e commit 5e8faf7
Show file tree
Hide file tree
Showing 4 changed files with 4,219 additions and 84 deletions.
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,6 @@ geo-types = ">=0.6.0, <0.8.0"
libm = "0.2.1"

[dev-dependencies]
csv = "1.1"
num-traits = "0.2"
serde = {version = "1", features = ["derive"]}
151 changes: 75 additions & 76 deletions src/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,56 +2,30 @@ use crate::neighbors::Direction;
use crate::{Coordinate, GeohashError, Neighbors, Rect};
use libm::ldexp;


const EXP_232: f64 = 4294967296.0;
// const MASK: u64 = 0x1f<<59;

// the alphabet for the base32 encoding used in geohashing
static BASE32_CODES: &[char] = &[
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'j', 'k',
'm', 'n', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
];

//
static DECODER: &[u8] = &[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 10, 11, 12, 13, 14, 15,
16, 255, 17, 18, 255, 19, 20, 255, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 10,
11, 12, 13, 14, 15, 16, 255, 17, 18, 255, 19, 20, 255, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
31, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255,
];



/// Encode a coordinate to a geohash with length `len`.
///
/// ### Examples
///
/// Encoding a coordinate to a length five geohash:
///
/// ```rust
/// let coord = geohash::Coordinate { x: -120.6623, y: 35.3003 };
///
/// let geohash_string = geohash::encode(coord, 5).expect("Invalid coordinate");
///
/// assert_eq!(geohash_string, "9q60y");
/// ```
///
/// Encoding a coordinate to a length ten geohash:
///
/// ```rust
/// let coord = geohash::Coordinate { x: -120.6623, y: 35.3003 };
///
/// let geohash_string = geohash::encode(coord, 10).expect("Invalid coordinate");
///
/// assert_eq!(geohash_string, "9q60y60rhs");
/// ```
pub fn old_encode(c: Coordinate<f64>, len: usize) -> Result<String, GeohashError> {
let mut out = String::with_capacity(len);

Expand Down Expand Up @@ -97,6 +71,8 @@ pub fn old_encode(c: Coordinate<f64>, len: usize) -> Result<String, GeohashError
Ok(out)
}

// bit shifting functions used in encoding and decoding

fn spread(x: u32) -> u64 {
let mut new_x = x as u64;
new_x = (new_x | (new_x << 16)) & 0x0000ffff0000ffff;
Expand All @@ -112,6 +88,43 @@ fn interleave(x: u32, y: u32) -> u64 {
spread(x) | (spread(y) << 1)
}

fn squash(x: u64) -> u32 {
let mut new_x = x & 0x5555555555555555;
new_x = (new_x | (new_x >> 1)) & 0x3333333333333333;
new_x = (new_x | (new_x >> 2)) & 0x0f0f0f0f0f0f0f0f;
new_x = (new_x | (new_x >> 4)) & 0x00ff00ff00ff00ff;
new_x = (new_x | (new_x >> 8)) & 0x0000ffff0000ffff;
new_x = (new_x | (new_x >> 16)) & 0x00000000ffffffff;
new_x as u32
}

fn deinterleave(x: u64) -> (u32, u32) {
(squash(x), squash(x >> 1))
}

/// Encode a coordinate to a geohash with length `len`.
///
/// ### Examples
///
/// Encoding a coordinate to a length five geohash:
///
/// ```rust
/// let coord = geohash::Coordinate { x: -120.6623, y: 35.3003 };
///
/// let geohash_string = geohash::encode(coord, 5).expect("Invalid coordinate");
///
/// assert_eq!(geohash_string, "9q60y");
/// ```
///
/// Encoding a coordinate to a length ten geohash:
///
/// ```rust
/// let coord = geohash::Coordinate { x: -120.6623, y: 35.3003 };
///
/// let geohash_string = geohash::encode(coord, 10).expect("Invalid coordinate");
///
/// assert_eq!(geohash_string, "9q60y60rhs");
/// ```
pub fn encode(c: Coordinate<f64>, len: usize) -> Result<String, GeohashError> {
let max_lat = 90f64;
let min_lat = -90f64;
Expand All @@ -126,7 +139,10 @@ pub fn encode(c: Coordinate<f64>, len: usize) -> Result<String, GeohashError> {
return Err(GeohashError::InvalidLength(len));
}

// divides the latitude by 90, then adds 1.5 to give a value between 1 and 2
// then we take the first 32 bits of the significand as a u32
let lat32 = ((c.y * 0.005555555555555556 + 1.5).to_bits() >> 20) as u32;
// same as latitude, but a division by 180 instead of 90
let lon32 = ((c.x * 0.002777777777777778 + 1.5).to_bits() >> 20) as u32;

let mut interleaved_int = interleave(lat32, lon32);
Expand All @@ -139,34 +155,16 @@ pub fn encode(c: Coordinate<f64>, len: usize) -> Result<String, GeohashError> {

// }
let mut out = String::with_capacity(len);
// loop through and take the first 5 bits of the interleaved value ech iteration
for _ in 0..len {
// println!("{:#b}, {:#b}", interleaved_int, interleaved_int&0x1f);
// let code = (interleaved_int&(MASK))>>59;
let code = (interleaved_int>>59) as usize&(0x1f);
// println!("{:#b}, {:#b}, {:#b}", interleaved_int, interleaved_int&0x1f, code);
// shifts so that the high 5 bits are now the low five bits, then masks to get their value
let code = (interleaved_int >> 59) as usize & (0x1f);
// uses that value to index into the array of base32 codes
out.push(BASE32_CODES[(code) as usize]);
// shifts the interleaved bits left by 5, so we get the next 5 bits on the next iteration
interleaved_int <<= 5;
}
Ok(out)

// let mut encoded = BASE32_GEOHASH.encode(&interleaved_int.to_be_bytes());
// encoded.truncate(len);

// Ok(encoded)
}

fn squash(x: u64) -> u32 {
let mut new_x = x & 0x5555555555555555;
new_x = (new_x | (new_x >> 1)) & 0x3333333333333333;
new_x = (new_x | (new_x >> 2)) & 0x0f0f0f0f0f0f0f0f;
new_x = (new_x | (new_x >> 4)) & 0x00ff00ff00ff00ff;
new_x = (new_x | (new_x >> 8)) & 0x0000ffff0000ffff;
new_x = (new_x | (new_x >> 16)) & 0x00000000ffffffff;
new_x as u32
}

fn deinterleave(x: u64) -> (u32, u32) {
(squash(x), squash(x >> 1))
}

/// Decode geohash string into latitude, longitude
Expand Down Expand Up @@ -203,15 +201,16 @@ pub fn decode_bbox(hash_str: &str) -> Result<Rect<f64>, GeohashError> {
}

fn decode_range(x: u32, r: f64) -> f64 {
let p = (x as f64) / EXP_232;
2.0 * r * p - r
// f64 in the range 1 to 2
let p = f64::from_bits(((x as u64) << 20) | (1023 << 52));
2.0 * r * (p - 1.0) - r
}

fn error_with_precision(bits: u32) -> (f64, f64) {
let lat_bits = bits / 2;
let long_bits = bits - lat_bits;
// let lat_err = 180.0 * f64::exp2(-(lat_bits as f64));
// let long_err = 360.0 * f64::exp2(-(long_bits as f64));

// the ldexp(x, n) function is equivalent to x * 2^n
let lat_err = ldexp(180.0, -(lat_bits as i32));
let long_err = ldexp(360.0, -(long_bits as i32));
(lat_err, long_err)
Expand Down Expand Up @@ -296,6 +295,20 @@ fn hash_value_of_char(c: char) -> Result<usize, GeohashError> {
Err(GeohashError::InvalidHashCharacter(c))
}

pub fn old_decode(hash_str: &str) -> Result<(Coordinate<f64>, f64, f64), GeohashError> {
let rect = old_decode_bbox(hash_str)?;
let c0 = rect.min();
let c1 = rect.max();
Ok((
Coordinate {
x: (c0.x + c1.x) / 2f64,
y: (c0.y + c1.y) / 2f64,
},
(c1.x - c0.x) / 2f64,
(c1.y - c0.y) / 2f64,
))
}

/// Decode a geohash into a coordinate with some longitude/latitude error. The
/// return value is `(<coordinate>, <longitude error>, <latitude error>)`.
///
Expand Down Expand Up @@ -340,20 +353,6 @@ fn hash_value_of_char(c: char) -> Result<usize, GeohashError> {
/// ),
/// );
/// ```
pub fn old_decode(hash_str: &str) -> Result<(Coordinate<f64>, f64, f64), GeohashError> {
let rect = old_decode_bbox(hash_str)?;
let c0 = rect.min();
let c1 = rect.max();
Ok((
Coordinate {
x: (c0.x + c1.x) / 2f64,
y: (c0.y + c1.y) / 2f64,
},
(c1.x - c0.x) / 2f64,
(c1.y - c0.y) / 2f64,
))
}

pub fn decode(hash_str: &str) -> Result<(Coordinate<f64>, f64, f64), GeohashError> {
let rect = decode_bbox(hash_str)?;
let c0 = rect.min();
Expand Down Expand Up @@ -436,4 +435,4 @@ pub fn old_neighbors(hash_str: &str) -> Result<Neighbors, GeohashError> {
n: old_neighbor(hash_str, Direction::N)?,
ne: old_neighbor(hash_str, Direction::NE)?,
})
}
}
53 changes: 45 additions & 8 deletions tests/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,32 +3,52 @@ extern crate geohash;

use geohash::{decode, encode, neighbors, Coordinate};

use csv;
use serde::Deserialize;

#[derive(Debug, Deserialize)]
struct TestCase {
string_hash: String,
lat: f64,
long: f64,
}

#[test]
fn test_encode() {
let c0 = Coordinate {
x: 112.5584f64,
y: 37.8324f64,
};
assert_eq!(encode(c0, 9usize).unwrap(), "ww8p1r4t8".to_string());
// use the testcases file to check encoding correctness
let mut rdr =
csv::Reader::from_path("tests/testcases.csv").expect("Failed to open file of test cases");
let mut iter = rdr.deserialize();
while let Some(result) = iter.next() {
let record: TestCase = result.expect("Unable to deserialize record");
let c = Coordinate {
x: record.long,
y: record.lat,
};
assert_eq!(encode(c, 12).unwrap(), record.string_hash);
}
// check that errors are thrown appropriately

// should throw an error because the length is greater than 12
let c1 = Coordinate {
x: 117f64,
y: 32f64,
};
assert_eq!(encode(c1, 3usize).unwrap(), "wte".to_string());
assert!(encode(c1, 13).is_err());

// should throw an error because the longitude is out of range
let c2 = Coordinate {
x: 190f64,
y: -80f64,
};
assert!(encode(c2, 3usize).is_err());

// should throw an error because the latitude is out of range
let c3 = Coordinate {
x: 100f64,
y: -100f64,
};
assert!(encode(c3, 3usize).is_err());

assert!(encode(c1, 13).is_err());
}

fn compare_within(a: f64, b: f64, diff: f64) {
Expand All @@ -55,7 +75,24 @@ fn test_decode() {
compare_decode("ww8p1r4t8", 112.558386, 37.832386, 0.000021457, 0.000021457);
compare_decode("9g3q", -99.31640625, 19.423828125, 0.17578125, 0.087890625);

// let diff = 1e-5f64;
// let mut rdr =
// csv::Reader::from_path("tests/testcases.csv").expect("Failed to open file of test cases");
// let mut iter = rdr.deserialize();
// while let Some(result) = iter.next() {
// let record: TestCase = result.expect("Unable to deserialize record");
// let c = decode(&record.string_hash).unwrap();
// compare_within(c.0.x, record.long, diff);
// compare_within(c.0.y, record.lat, diff);
// }

// check for errors being thrown appropriately

// should throw an error since a is not a valid character
assert!(decode("abcd").is_err());

// should throw an error since the input is too long
assert!(decode("ww8p1r4t8ww8p1r4t8").is_err());
}

#[test]
Expand Down
Loading

0 comments on commit 5e8faf7

Please sign in to comment.