ipfs-inactive · daviddias · Mar 20, 2018 · Feb 22, 2018 · pgte · Feb 27, 2018
diff --git a/README.md b/README.md
@@ -167,7 +167,7 @@ const Exporter = require('ipfs-unixfs-engine').Exporter
 
 ### new Exporter(<cid or ipfsPath>, <dag or ipld-resolver>)
 
-Uses the given [dag API or an ipld-resolver instance][] to fetch an IPFS [UnixFS][] object(s) by their multiaddress.
+Uses the given [dag API] or an [ipld-resolver instance][] to fetch an IPFS [UnixFS][] object(s) by their multiaddress.
 
 Creates a new readable stream in object mode that outputs objects of the form
 
@@ -181,9 +181,101 @@ Creates a new readable stream in object mode that outputs objects of the form
 Errors are received as with a normal stream, by listening on the `'error'` event to be emitted.
 
 
-[IPLD Resolver]: https://github.com/ipld/js-ipld-resolver
+[dag API]: https://github.com/ipfs/interface-ipfs-core/blob/master/SPEC/DAG.md
+[ipld-resolver instance]: https://github.com/ipld/js-ipld-resolver
 [UnixFS]: https://github.com/ipfs/specs/tree/master/unixfs
 
+## Reader
+
+The `reader` allows you to receive part or all of a file as a [pull-stream].
+
+#### Reader example
+
+```js
+const readable = require('ipfs-unixfs-engine').readable
+const pull = require('pull-stream')
+const drain = require('pull-stream/sinks/collect')
+
+pull(
+  readable(cid, ipldResolver)
+  collect((error, chunks) => {
+    // do something with the file chunks and/or handle errors
+  })
+)
+```
+
+#### Reader API
+
+```js
+const reader = require('ipfs-unixfs-engine').reader
+```
+
+### reader(<cid or ipfsPath>, <dag or ipld-resolver>, <begin>, <end>)
+
+Uses the given [dag API][] or an [ipld-resolver instance][] to fetch an IPFS [UnixFS][] object by their multiaddress.
+
+Creates a new [pull-stream][] that sends the requested chunks of data as a series of [Buffer][] objects.
+
+```js
+const readable = require('ipfs-unixfs-engine').readable
+const pull = require('pull-stream')
+const drain = require('pull-stream/sinks/drain')
+
+pull(
+  readable(cid, ipldResolver),
+  drain((chunk) => {
+    // do something with the file chunk
+  })
+)
+```
+
+#### `begin` and `end`
+
+`begin` and `end` arguments can optionally be passed to the reader function.  These follow the same semantics as the JavaScript [`Array.slice(begin, end)`][] method.
+
+That is: `begin` is the index in the stream to start sending data, `end` is the index *before* which to stop sending data.
+
+A negative `begin` starts the slice from the end of the stream and a negative `end` ends the slice by subtracting `end` from the total stream length.
+
+See [the tests](test/reader.js) for examples of using these arguments.
+
+```js
+const readable = require('ipfs-unixfs-engine').readable
+const pull = require('pull-stream')
+const drain = require('pull-stream/sinks/drain')
+
+pull(
+  readable(cid, ipldResolver, 0, 10)
+  drain((chunk) => {
+    // chunk is a Buffer containing only the first 10 bytes of the stream
+  })
+)
+```
+
+#### Errors
+
+Errors are received by [pull-stream][] sinks.
+
+```js
+const readable = require('ipfs-unixfs-engine').readable
+const pull = require('pull-stream')
+const drain = require('pull-stream/sinks/collect')
+
+pull(
+  readable(cid, ipldResolver, 0, 10)
+  collect((error, chunks) => {
+    // handle the error
+  })
+)
+```
+
+[pull-stream]: https://www.npmjs.com/package/pull-stream
+[Buffer]: https://www.npmjs.com/package/buffer
+[dag API]: https://github.com/ipfs/interface-ipfs-core/blob/master/SPEC/DAG.md
+[ipld-resolver instance]: https://github.com/ipld/js-ipld-resolver
+[UnixFS]: https://github.com/ipfs/specs/tree/master/unixfs
+[`Array.slice(begin, end)`]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/slice
+
 ## Contribute
 
 Feel free to join in. All welcome. Open an [issue](https://github.com/ipfs/js-ipfs-unixfs-engine/issues)!

diff --git a/package.json b/package.json
@@ -66,6 +66,7 @@
     "lodash": "^4.17.5",
     "multihashes": "~0.4.13",
     "multihashing-async": "~0.4.8",
+    "pull-async-values": "^1.0.3",
     "pull-batch": "^1.0.0",
     "pull-block": "^1.4.0",
     "pull-cat": "^1.1.11",

diff --git a/src/index.js b/src/index.js
@@ -2,3 +2,4 @@
 
 exports.importer = exports.Importer = require('./importer')
 exports.exporter = exports.Exporter = require('./exporter')
+exports.reader = exports.Reader = require('./reader')
diff --git a/src/reader/index.js b/src/reader/index.js
@@ -0,0 +1,137 @@
+'use strict'
+
+const CID = require('cids')
+const pull = require('pull-stream')
+const asyncValues = require('pull-async-values')
+const asyncMap = require('pull-stream/throughs/async-map')
+const map = require('pull-stream/throughs/map')
+const UnixFS = require('ipfs-unixfs')
+const toB58String = require('multihashes').toB58String
+const waterfall = require('async/waterfall')
+
+module.exports = (path, ipldResolver, begin = 0, end) => {
+  let streamPosition = 0
+
+  return pull(
+    asyncValues((cb) => {
+      waterfall([
+        (next) => toCid(path, next),
+        (cid, next) => ipldResolver.get(cid, next),
+        (node, next) => {
+          const meta = UnixFS.unmarshal(node.value.data)
+
+          if (meta.type !== 'file') {
+            return next(new Error(`Path ${path} was not a file (was ${meta.type}), can only read files`))
+          }
+
+          const fileSize = meta.fileSize()
+
+          if (!end || end > fileSize) {
+            end = fileSize
+          }
+
+          if (begin < 0) {
+            begin = fileSize + begin
+          }
+
+          if (end < 0) {
+            end = fileSize + end
+          }
+
+          const links = node.value.links
+
+          if (!links || !links.length) {
+            if (meta.data && meta.data.length) {
+              // file was small enough to fit in one DAGNode so has no links
+              return next(null, [(done) => done(null, meta.data)])
+            }
+
+            return next(new Error(`Path ${path} had no links or data`))
+          }
+
+          const linkedDataSize = links.reduce((acc, curr) => acc + curr.size, 0)
+          const overhead = (linkedDataSize - meta.fileSize()) / links.length
+
+          // create an array of functions to fetch link data
+          next(null, links.map((link) => (done) => {
+            // DAGNode Links report unixfs object data sizes $overhead bytes (typically 14)
+            // larger than they actually are due to the protobuf wrapper
+            const bytesInLinkedObjectData = link.size - overhead
+
+            if (begin > (streamPosition + bytesInLinkedObjectData)) {
+              // Start byte is after this block so skip it
+              streamPosition += bytesInLinkedObjectData
+
+              return done()
+            }
+
+            if (end < streamPosition) {
+              // End byte was before this block so skip it
+              streamPosition += bytesInLinkedObjectData
+
+              return done()
+            }
+
+            // transform the multihash to a cid, the cid to a node and the node to some data
+            waterfall([
+              (next) => toCid(link.multihash, next),
+              (cid, next) => ipldResolver.get(cid, next),
+              (node, next) => next(null, node.value.data),
+              (data, next) => next(null, UnixFS.unmarshal(data).data)
+            ], done)
+          }))
+        }
+      ], cb)
+    }),
+    asyncMap((loadLinkData, cb) => loadLinkData(cb)),
+    pull.filter(Boolean),
+    map((data) => {
+      const block = extractDataFromBlock(data, streamPosition, begin, end)
+
+      streamPosition += data.length
+
+      return block
+    })
+  )
+}
+
+function toCid (input, callback) {
+  let path = input
+  let cid
+
+  try {
+    if (Buffer.isBuffer(path)) {
+      path = toB58String(path)
+    }
+
+    if (path.indexOf('/ipfs/') === 0) {
+      path = path.substring('/ipfs/'.length)
+    }
+
+    if (path.charAt(path.length - 1) === '/') {
+      path = path.substring(0, path.length - 1)
+    }
+
+    cid = new CID(path)
+  } catch (error) {
+    return callback(new Error(`Path '${input}' was invalid: ${error.message}`))
+  }
+
+  callback(null, cid)
+}
+
+function extractDataFromBlock (block, streamPosition, begin, end) {
+  const blockLength = block.length
+
+  if (end - streamPosition < blockLength) {
+    // If the end byte is in the current block, truncate the block to the end byte
+    block = block.slice(0, end - streamPosition)
+  }
+
+  if (begin > streamPosition && begin < (streamPosition + blockLength)) {
+    // If the start byte is in the current block, skip to the start byte
+    block = block.slice(begin - streamPosition)
+  }
+
+  return block
+}
diff --git a/test/browser.js b/test/browser.js
@@ -60,6 +60,9 @@ describe('IPFS data importing tests on the Browser', function () {
   // require('./exporter')(repo)
   // require('./exporter-subtree')(repo)
 
+  // Reader
+  require('./reader')(repo)
+
   // Other
   require('./import-export')(repo)
   require('./import-export-nested-dir')(repo)

diff --git a/test/node.js b/test/node.js
@@ -61,6 +61,9 @@ describe('IPFS UnixFS Engine', () => {
   require('./exporter')(repo)
   require('./exporter-subtree')(repo)
 
+  // Reader
+  require('./reader')(repo)
+
   // Other
   require('./import-export')(repo)
   require('./import-export-nested-dir')(repo)