|
1 | | -# hw-json |
2 | | -[](https://circleci.com/gh/haskell-works/hw-json/tree/master) |
| 1 | +# hw-json-simple-cursor |
| 2 | +[](https://circleci.com/gh/haskell-works/hw-json-simple-cursor/tree/master) |
3 | 3 |
|
4 | | -`hw-json` is a succinct JSON parsing library. |
| 4 | +`hw-json-simple-cursor` is support library for `hw-json`, a succinct JSON parsing library. |
5 | 5 |
|
6 | 6 | It uses succinct data-structures to allow traversal of large JSON strings with minimal memory overhead. |
7 | 7 |
|
8 | | -For an example, see [`app/Main.hs`](../master/app/Main.hs) |
9 | | - |
10 | | -## Prerequisites |
11 | | - |
12 | | -* `cabal` version `2.2` or later |
13 | | - |
14 | | -## Memory benchmark |
15 | | - |
16 | | -### Parsing large Json files in Scala with Argonaut |
17 | | - |
18 | | -```text |
19 | | - S0U EU OU MU CCSU CMD |
20 | | ---------- --------- ----------- -------- -------- --------------------------------------------------------------- |
21 | | - 0.0 80,526.3 76,163.6 72,338.6 13,058.6 sbt console |
22 | | - 0.0 536,660.4 76,163.6 72,338.6 13,058.6 import java.io._, argonaut._, Argonaut._ |
23 | | - 0.0 552,389.1 76,163.6 72,338.6 13,058.6 val file = new File("/Users/jky/Downloads/78mbs.json" |
24 | | - 0.0 634,066.5 76,163.6 72,338.6 13,058.6 val array = new Array[Byte](file.length.asInstanceOf[Int]) |
25 | | - 0.0 644,552.3 76,163.6 72,338.6 13,058.6 val is = new FileInputStream("/Users/jky/Downloads/78mbs.json") |
26 | | - 0.0 655,038.1 76,163.6 72,338.6 13,058.6 is.read(array) |
27 | | -294,976.0 160,159.7 1,100,365.0 79,310.8 13,748.1 val json = new String(array) |
28 | | -285,182.9 146,392.6 1,956,264.5 82,679.8 14,099.6 val data = Parse.parse(json) |
29 | | - *********** |
30 | | -``` |
31 | | - |
32 | | -### Parsing large Json files in Haskell with Aeson |
33 | | - |
34 | | -```haskell |
35 | | --- CMD -- Mem (MB) |
36 | | ----------------------------------------------------------- -- -------- |
37 | | -import Control.DeepSeq -- 94 |
38 | | -import Data.Aeson -- 100 |
39 | | -import qualified Data.ByteString.Lazy as BSL -- 104 |
40 | | -bs <- BSL.readFile "../corpus/bench/hospitalisation.json" -- 105 |
41 | | -let !x = deepseq bs bs -- 146 |
42 | | -let !y = decode json78m :: Maybe Value -- 669 |
43 | | -``` |
44 | | - |
45 | | -### Parsing large Json files in Haskell with hw-json |
46 | | - |
47 | | -```haskell |
48 | | --- CMD -- Mem (MB) |
49 | | ---------------------------------------------------------------------- -- -------- |
50 | | -import Foreign -- 93 |
51 | | -import Control.Monad -- 95 |
52 | | -import Data.Word -- 96 |
53 | | -import HaskellWorks.Data.BalancedParens.Simple -- 97 |
54 | | -import HaskellWorks.Data.Bits.BitShown -- 98 |
55 | | -import HaskellWorks.Data.FromForeignRegion -- 99 |
56 | | -import HaskellWorks.Data.Json.Backend.Standard.Cursor -- 106 |
57 | | -import System.IO.MMap -- 109 |
58 | | -import qualified Data.ByteString as BS -- 110 |
59 | | -import qualified Data.Vector.Storable as DVS -- 111 |
60 | | -import qualified HaskellWorks.Data.ByteString as BS -- 112 |
61 | | -import qualified HaskellWorks.Data.Json.Backend.Standard.Fast as FAST -- 114 |
62 | | -bs <- BS.mmap "../corpus/bench/hospitalisation.json" -- 115 |
63 | | -let !cursor = FAST.makeCursor bs -- 203 |
64 | | -``` |
65 | | - |
66 | | -## Examples |
67 | | - |
68 | | -### Navigation example |
69 | | - |
70 | | -```haskell |
71 | | -import Control.Monad |
72 | | -import Data.String |
73 | | -import Data.Word |
74 | | -import HaskellWorks.Data.BalancedParens.Simple |
75 | | -import HaskellWorks.Data.Bits.BitShow |
76 | | -import HaskellWorks.Data.Bits.BitShown |
77 | | -import HaskellWorks.Data.FromForeignRegion |
78 | | -import HaskellWorks.Data.Json.Backend.Standard.Cursor |
79 | | -import HaskellWorks.Data.Json.Internal.Token.Types |
80 | | -import HaskellWorks.Data.RankSelect.Base.Rank0 |
81 | | -import HaskellWorks.Data.RankSelect.Base.Rank1 |
82 | | -import HaskellWorks.Data.RankSelect.Base.Select1 |
83 | | -import HaskellWorks.Data.RankSelect.CsPoppy |
84 | | -import System.IO.MMap |
85 | | - |
86 | | -import qualified Data.ByteString as BS |
87 | | -import qualified Data.Vector.Storable as DVS |
88 | | -import qualified HaskellWorks.Data.Json.Backend.Standard.Cursor as C |
89 | | -import qualified HaskellWorks.Data.Json.Backend.Standard.Fast as FAST |
90 | | -import qualified HaskellWorks.Data.TreeCursor as TC |
91 | | - |
92 | | -let fc = TC.firstChild |
93 | | -let ns = TC.nextSibling |
94 | | -let pn = TC.parent |
95 | | -let ss = TC.subtreeSize |
96 | | -let cursor = FAST.makeCursor "[null, {\"field\": 1}]" |
97 | | -cursor |
98 | | -fc cursor |
99 | | -(fc >=> ns) cursor |
100 | | -``` |
101 | | - |
102 | | -### Querying example |
103 | | - |
104 | | -```haskell |
105 | | -import Control.Monad |
106 | | -import Data.Function |
107 | | -import Data.List |
108 | | -import HaskellWorks.Data.Json.Backend.Standard.Load.Cursor |
109 | | -import HaskellWorks.Data.Json.Backend.Standard.Load.Partial |
110 | | -import HaskellWorks.Data.Json.Backend.Standard.Load.Raw |
111 | | -import HaskellWorks.Data.Json.PartialValue |
112 | | -import HaskellWorks.Data.MQuery |
113 | | -import HaskellWorks.Data.MQuery.Micro |
114 | | -import HaskellWorks.Data.MQuery.Row |
115 | | - |
116 | | -import qualified Data.DList as DL |
117 | | - |
118 | | -!cursor <- loadPartial "../corpus/bench/78mb.json" |
119 | | -!cursor <- loadCursorWithIndex "../corpus/bench/78mb.json" |
120 | | -!cursor <- loadCursor "../corpus/bench/78mb.json" |
121 | | -!cursor <- loadCursorWithCsPoppyIndex "../corpus/bench/78mb.json" |
122 | | -let !json = jsonPartialJsonValueAt cursor |
123 | | -let q = MQuery (DL.singleton json) |
124 | | - |
125 | | -putPretty $ q >>= item & limit 10 |
126 | | -putPretty $ q >>= item & page 10 1 |
127 | | -putPretty $ q >>= item >>= hasKV "founded_year" (JsonPartialNumber 2005) & limit 10 |
128 | | -putPretty $ q >>= item >>= entry |
129 | | -putPretty $ q >>= item >>= entry >>= named "name" & limit 10 |
130 | | -putPretty $ q >>= (item >=> entry >=> named "acquisition" >=> entry >=> named "price_currency_code") |
131 | | -putPretty $ q >>= (item >=> entry >=> named "acquisition" >=> entry >=> named "price_currency_code") & onList (uniq . sort) |
132 | | -putPretty $ q >>= (item >=> entry >=> named "acquisition" >=> entry >=> named "price_currency_code" >=> asString >=> valueOf "USD") & limit 10 |
133 | | -putPretty $ q >>= (item >=> entry >=> named "acquisition" >=> having (entry >=> named "price_currency_code" >=> asString >=> valueOf "USD") >=> entry >=> named "price_amount") & limit 10 |
134 | | -putPretty $ q >>= (item >=> entry >=> named "acquisition" >=> having (entry >=> named "price_currency_code" >=> asString >=> valueOf "USD") >=> entry >=> named "price_amount" >=> castAsInteger ) & limit 10 |
135 | | -putPretty $ q >>= (item >=> entry >=> named "acquisition" >=> having (entry >=> named "price_currency_code" >=> asString >=> valueOf "USD") >=> entry >=> named "price_amount" >=> castAsInteger ) & aggregate sum |
136 | | - |
137 | | -putPretty $ q >>= item & limit 10 |
138 | | -putPretty $ q >>= item & page 10 1 |
139 | | -putPretty $ q >>= item >>= entry |
140 | | -putPretty $ q >>= item >>= entry >>= named "name" & limit 10 |
141 | | -putPretty $ q >>= (item >=> entry >=> named "acquisition" >=> entry >=> named "price_currency_code" >=> asString) |
142 | | -putPretty $ q >>= (item >=> entry >=> named "acquisition" >=> entry >=> named "price_currency_code" >=> asString) & onList (uniq . sort) |
143 | | -putPretty $ q >>= (item >=> entry >=> named "acquisition" >=> entry >=> named "price_currency_code" >=> asString >=> valueOf "USD") & limit 10 |
144 | | -putPretty $ q >>= (item >=> entry >=> named "acquisition" >=> having (entry >=> named "price_currency_code" >=> asString >=> valueOf "USD") >=> entry >=> named "price_amount") & limit 10 |
145 | | -putPretty $ q >>= (item >=> entry >=> named "acquisition" >=> having (entry >=> named "price_currency_code" >=> asString >=> valueOf "USD") >=> entry >=> named "price_amount" >=> castAsInteger ) & limit 10 |
146 | | -putPretty $ q >>= (item >=> entry >=> named "acquisition" >=> having (entry >=> named "price_currency_code" >=> asString >=> valueOf "USD") >=> entry >=> named "price_amount" >=> castAsInteger ) & aggregate sum |
147 | | -``` |
| 8 | +For more information see [`hw-json`](https://github.com/haskell-works/hw-json). |
148 | 9 |
|
149 | 10 | ## References |
150 | 11 |
|
|
0 commit comments