Skip to content

Commit

Permalink
LazyIndexMap for JSON objects (#522)
Browse files Browse the repository at this point in the history
* adding LazyIndexMap for JSON objects

* build map in one step

* tests and reverse input order
  • Loading branch information
samuelcolvin authored Apr 12, 2023
1 parent 5c90851 commit d786d1b
Show file tree
Hide file tree
Showing 7 changed files with 86 additions and 11 deletions.
1 change: 0 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ strum_macros = "0.24.3"
serde_json = {version = "1.0.87", features = ["preserve_order"]}
enum_dispatch = "0.3.8"
serde = "1.0.147"
indexmap = "1.9.1"
mimalloc = { version = "0.1.30", default-features = false, optional = true }
speedate = "0.7.0"
ahash = "0.8.0"
Expand Down
9 changes: 5 additions & 4 deletions src/input/parse_json.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
use std::fmt;

use indexmap::IndexMap;
use pyo3::prelude::*;
use pyo3::types::{PyDict, PyList, PySet};
use serde::de::{Deserialize, DeserializeSeed, Error as SerdeError, MapAccess, SeqAccess, Visitor};

use crate::lazy_index_map::LazyIndexMap;

use crate::build_tools::py_err;

#[derive(Copy, Clone, Debug)]
Expand Down Expand Up @@ -58,7 +59,7 @@ pub enum JsonInput {
Object(JsonObject),
}
pub type JsonArray = Vec<JsonInput>;
pub type JsonObject = IndexMap<String, JsonInput>;
pub type JsonObject = LazyIndexMap<String, JsonInput>;

impl ToPyObject for JsonInput {
fn to_object(&self, py: Python<'_>) -> PyObject {
Expand Down Expand Up @@ -159,15 +160,15 @@ impl<'de> Deserialize<'de> for JsonInput {
{
match visitor.next_key_seed(KeyDeserializer)? {
Some(first_key) => {
let mut values = IndexMap::new();
let mut values = LazyIndexMap::new();

values.insert(first_key, visitor.next_value()?);
while let Some((key, value)) = visitor.next_entry()? {
values.insert(key, value);
}
Ok(JsonInput::Object(values))
}
None => Ok(JsonInput::Object(IndexMap::new())),
None => Ok(JsonInput::Object(LazyIndexMap::new())),
}
}
}
Expand Down
7 changes: 3 additions & 4 deletions src/input/return_enums.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use std::borrow::Cow;
use std::slice::Iter as SliceIter;

use pyo3::intern;
use pyo3::prelude::*;
Expand All @@ -10,8 +11,6 @@ use pyo3::types::PyFunction;
#[cfg(not(PyPy))]
use pyo3::PyTypeInfo;

use indexmap::map::Iter;

use crate::errors::{py_err_string, ErrorType, InputValue, ValError, ValLineError, ValResult};
use crate::recursion_guard::RecursionGuard;
use crate::validators::{CombinedValidator, Extra, Validator};
Expand Down Expand Up @@ -403,7 +402,7 @@ impl<'py> Iterator for AttributesGenericIterator<'py> {
}

pub struct JsonObjectGenericIterator<'py> {
object_iter: Iter<'py, String, JsonInput>,
object_iter: SliceIter<'py, (String, JsonInput)>,
}

impl<'py> JsonObjectGenericIterator<'py> {
Expand All @@ -418,7 +417,7 @@ impl<'py> Iterator for JsonObjectGenericIterator<'py> {
type Item = ValResult<'py, (&'py String, &'py JsonInput)>;

fn next(&mut self) -> Option<Self::Item> {
self.object_iter.next().map(Ok)
self.object_iter.next().map(|(key, value)| Ok((key, value)))
}
// size_hint is omitted as it isn't needed
}
Expand Down
67 changes: 67 additions & 0 deletions src/lazy_index_map.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
use std::borrow::Borrow;
use std::cell::RefCell;
use std::cmp::{Eq, PartialEq};
use std::fmt::Debug;
use std::hash::Hash;
use std::slice::Iter as SliceIter;

use ahash::AHashMap;

#[derive(Debug, Clone, Default)]
pub struct LazyIndexMap<K, V> {
vec: Vec<(K, V)>,
map: RefCell<Option<AHashMap<K, usize>>>,
}

/// Like [IndexMap](https://docs.rs/indexmap/latest/indexmap/) but only builds the lookup map when it's needed.
impl<K, V> LazyIndexMap<K, V>
where
K: Clone + Debug + Eq + Hash,
V: Clone + Debug,
{
pub fn new() -> Self {
Self {
vec: Vec::new(),
map: RefCell::new(None),
}
}

pub fn insert(&mut self, key: K, value: V) {
self.vec.push((key, value))
}

pub fn len(&self) -> usize {
self.vec.len()
}

pub fn get<Q: ?Sized>(&self, key: &Q) -> Option<&V>
where
K: Borrow<Q> + PartialEq<Q>,
Q: Hash + Eq,
{
let mut map = self.map.borrow_mut();
if let Some(map) = map.as_ref() {
map.get(key).map(|&i| &self.vec[i].1)
} else {
let mut new_map = AHashMap::with_capacity(self.vec.len());
let mut value = None;
// reverse here so the last value is the one that's returned
for (index, (k, v)) in self.vec.iter().enumerate().rev() {
if value.is_none() && k == key {
value = Some(v);
}
new_map.insert(k.clone(), index);
}
*map = Some(new_map);
value
}
}

pub fn keys(&self) -> impl Iterator<Item = &K> {
self.vec.iter().map(|(k, _)| k)
}

pub fn iter(&self) -> SliceIter<'_, (K, V)> {
self.vec.iter()
}
}
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ mod build_context;
mod build_tools;
mod errors;
mod input;
mod lazy_index_map;
mod lookup_key;
mod questions;
mod recursion_guard;
Expand Down
11 changes: 10 additions & 1 deletion tests/test_json.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import re

import pytest
Expand Down Expand Up @@ -100,7 +101,7 @@ def test_float(input_value, expected):
assert v.validate_json(input_value) == expected


def test_model():
def test_typed_dict():
v = SchemaValidator(
{
'type': 'typed-dict',
Expand All @@ -114,6 +115,10 @@ def test_model():
# language=json
input_str = '{"field_a": "abc", "field_b": 1}'
assert v.validate_json(input_str) == {'field_a': 'abc', 'field_b': 1}
# language=json
input_str = '{"field_a": "a", "field_a": "b", "field_b": 1}'
assert v.validate_json(input_str) == {'field_a': 'b', 'field_b': 1}
assert v.validate_json(input_str) == {'field_a': 'b', 'field_b': 1}


def test_float_no_remainder():
Expand Down Expand Up @@ -152,6 +157,10 @@ def test_dict():
v = SchemaValidator({'type': 'dict', 'keys_schema': {'type': 'int'}, 'values_schema': {'type': 'int'}})
assert v.validate_json('{"1": 2, "3": 4}') == {1: 2, 3: 4}

# duplicate keys, the last value wins, like with python
assert json.loads('{"1": 1, "1": 2}') == {'1': 2}
assert v.validate_json('{"1": 1, "1": 2}') == {1: 2}


def test_dict_any_value():
v = SchemaValidator({'type': 'dict', 'keys_schema': {'type': 'str'}})
Expand Down

0 comments on commit d786d1b

Please sign in to comment.