-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit f03d0ca
Showing
14 changed files
with
10,353 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
# Created by http://gitignore.io | ||
|
||
### Node ### | ||
lib-cov | ||
*.seed | ||
*.log | ||
*.csv | ||
*.dat | ||
*.out | ||
*.pid | ||
*.gz | ||
|
||
pids | ||
logs | ||
results | ||
|
||
npm-debug.log | ||
node_modules | ||
coverage |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
test/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
The MIT License (MIT) | ||
|
||
Copyright (c) 2013 Bala Clark | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining a copy of | ||
this software and associated documentation files (the "Software"), to deal in | ||
the Software without restriction, including without limitation the rights to | ||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of | ||
the Software, and to permit persons to whom the Software is furnished to do so, | ||
subject to the following conditions: | ||
|
||
The above copyright notice and this permission notice shall be included in all | ||
copies or substantial portions of the Software. | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS | ||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR | ||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER | ||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
|
||
test: | ||
@./node_modules/mocha/bin/mocha --reporter spec test | ||
|
||
setup: | ||
@npm install | ||
|
||
cover: | ||
@./node_modules/istanbul/lib/cli.js cover ./node_modules/mocha/bin/_mocha test -- --ui bdd -t 5000 | ||
|
||
clean: | ||
@rm -rf coverage | ||
|
||
.PHONY: test setup cover clean |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
MHTML | ||
----- | ||
|
||
Extract MHTML files to HTML. | ||
|
||
TODO | ||
---- | ||
|
||
* make callbacks optional | ||
* return more errors where possible | ||
* test with many different mhtml files | ||
* update image srcs in css to relative when needed | ||
* save assets to a subfolder | ||
* allow mhtml files to be created from a folder | ||
* convert to PDF | ||
* fully support spec: http://www.ietf.org/rfc/rfc2110.txt | ||
|
||
Example images © | ||
---------------- | ||
|
||
* Ben Fredericson (http://www.flickr.com/photos/xjrlokix/7670504246/sizes/m/in/photostream/, http://www.flickr.com/photos/xjrlokix/7670507852/sizes/m/in/photostream/) | ||
* Major Clanger (http://www.flickr.com/photos/major_clanger/4850772/sizes/m/in/photostream/) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
'use strict'; | ||
|
||
var fs = require('fs'); | ||
var path = require('path'); | ||
var program = require('commander'); | ||
var glob = require('glob'); | ||
var mhtml = require(__dirname + '/mhtml'); | ||
|
||
program | ||
.version(require(__dirname + '/package').version) | ||
.option('-e, --extract [value]', 'Extract MTHML archive / folder of MHTML archives') | ||
.option('-o, --output [value]', 'Output destination (defaults to same folder as the source file)') | ||
.option('-f, --force', 'Delete the existing output folder before extracting') | ||
// .option('-v, --verbose', 'Verbose mode') | ||
.parse(process.argv); | ||
|
||
function getdir(file, base) { | ||
return path.join(base || path.dirname(file), path.basename(file, path.extname(file))); | ||
} | ||
|
||
if (program.extract) { | ||
|
||
var output = program.output || getdir(program.extract); | ||
|
||
fs.stat(program.extract, function (err, stats) { | ||
|
||
if (stats.isDirectory()) { | ||
|
||
glob(path.join(program.extract, '*.{mht,mhtml}'), function (err, files) { | ||
files.forEach(function (file) { | ||
mhtml.extract(file, getdir(file, program.output), function (err) { | ||
if (err) console.error(err); | ||
}, program.force); | ||
}); | ||
}); | ||
|
||
} else { | ||
mhtml.extract(program.extract, output, function (err) { | ||
if (err) console.error(err); | ||
}, program.force); | ||
} | ||
}); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
'use strict'; | ||
|
||
var fs = require('fs'); | ||
var readline = require('readline'); | ||
var stream = require('stream'); | ||
var mkdir = require('mkdirp'); | ||
var rmdir = require('rimraf'); | ||
var path = require('path'); | ||
var EOL = require('os').EOL; | ||
var Part = require(__dirname + '/part'); | ||
|
||
module.exports = extractMHTML; | ||
|
||
function MHTMLExtractor(source, opts) { | ||
this.boundary = null; | ||
this.source = source; | ||
this.parts = []; | ||
return this; | ||
} | ||
|
||
MHTMLExtractor.prototype = { | ||
|
||
getParts: function (cb) { | ||
|
||
var instream = fs.createReadStream(this.source); | ||
var outstream = new stream; | ||
var rl = readline.createInterface(instream, outstream); | ||
|
||
var lineno = 0; | ||
var lines = []; | ||
var boundary; | ||
var validMhtml = false; | ||
var part; | ||
var meta; | ||
|
||
var readMeta = false; | ||
var readContent = false; | ||
|
||
rl.on('line', function readLine(line) { | ||
|
||
lines.push(line); | ||
|
||
boundary = line.match(/boundary=["'](.+?)["']/); | ||
|
||
if (!this.boundry && line.match('Content-Type: multipart/related')) { | ||
validMhtml = true; | ||
} | ||
|
||
if (!this.boundary && boundary) { | ||
this.boundary = boundary[1]; | ||
} | ||
|
||
if (line.match(new RegExp('^--' + this.boundary))) { | ||
readMeta = true; | ||
readContent = false; | ||
part = new Part(); | ||
} | ||
|
||
if (readMeta === true) { | ||
|
||
// a newline after the meta block signifies the start of the content block | ||
if (line.match(/^$/)) { | ||
readMeta = false; | ||
readContent = true; | ||
this.parts.push(part); | ||
} else { | ||
meta = line.match(/^(Content-[A-Za-z-]+):(?:\s+)?(.*)/i); | ||
if (meta) { | ||
part.meta[meta[1].toLowerCase()] = meta[2]; | ||
} | ||
} | ||
} | ||
|
||
if (readContent === true && !line.match(/^$/)) { | ||
part.content += line + EOL; | ||
} | ||
|
||
lineno++; | ||
|
||
}.bind(this)); | ||
|
||
rl.on('close', function endRead() { | ||
if (!validMhtml) cb(new Error('Invalid MHTML file')); | ||
else cb(null); | ||
}.bind(this)); | ||
}, | ||
|
||
extractParts: function (dest, cb, force) { | ||
|
||
// TODO: optionally delete existing, otherwise return error | ||
// TODO: allow mode to be passed as an argument | ||
// TODO: save assets in subfolder? | ||
|
||
var noParts = this.parts.length; | ||
var done = 0; | ||
|
||
var extract = function (err) { | ||
|
||
mkdir(dest, function (err) { | ||
|
||
if (err) return cb(err); | ||
|
||
this.parts.forEach(function (part) { | ||
|
||
var filePath = path.join(dest, part.filename()); | ||
|
||
part.decoded(function (err, content) { | ||
|
||
fs.writeFile(filePath, content, function (err) { | ||
if (err) return cb(err); | ||
done++; | ||
if (done == noParts) return cb(null); | ||
}); | ||
}); | ||
}); | ||
}.bind(this)); | ||
}.bind(this); | ||
|
||
if (force) { | ||
rmdir(dest, extract); | ||
} else { | ||
extract(); | ||
} | ||
}, | ||
|
||
extract: function (dest, cb, force) { | ||
var force = (typeof force === 'undefined') ? false : force; | ||
this.getParts(function readFile(err) { | ||
if (err) return cb(err); | ||
this.extractParts(dest, function extractParts(err) { | ||
cb(err); | ||
}, force); | ||
}.bind(this)); | ||
} | ||
}; | ||
|
||
function extractMHTML(source, destination, cb, force) { | ||
new MHTMLExtractor(source).extract(destination, cb, force); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
'use strict'; | ||
|
||
var path = require('path'); | ||
var mimelib = require('mimelib') | ||
var cheerio = require('cheerio'); | ||
|
||
module.exports = Part; | ||
|
||
function Part() { | ||
this.meta = {}; | ||
this.content = ''; | ||
} | ||
|
||
Part.prototype = { | ||
|
||
decoder: { | ||
base64: function (encoded) { | ||
return new Buffer(encoded, 'base64'); | ||
}, | ||
'quoted-printable': mimelib.decodeQuotedPrintable | ||
}, | ||
|
||
filename: function () { | ||
|
||
if (this.meta.hasOwnProperty('content-location')) { | ||
return path.basename(this.meta['content-location'].replace(/\\/g, '/')); | ||
} | ||
|
||
// TODO: use content-id if present? | ||
var ext = (mimelib.contentTypesReversed.hasOwnProperty(this.meta['content-type'])) | ||
? '.' + mimelib.contentTypesReversed[this.meta['content-type']] : ''; | ||
|
||
return Math.floor(Math.random() * 5000000000) + ext; | ||
}, | ||
|
||
decoded: function (cb) { | ||
|
||
var decoded = this.decoder[this.meta['content-transfer-encoding']](this.content); | ||
|
||
// TODO: make this optional | ||
if (this.meta['content-type'].match('text/html')) { | ||
|
||
var $ = cheerio.load(decoded); | ||
var attrs = $('[href]'); | ||
var todo = attrs.length; | ||
|
||
attrs.each(function () { | ||
|
||
var attr = $(this).attr('href'); | ||
if (attr.match(/^file:/)) { | ||
decoded = decoded.replace(new RegExp(attr, 'g'), attr.match(/[^/]*$/)[0]); | ||
} | ||
|
||
todo--; | ||
}); | ||
|
||
if (!todo) cb(null, decoded); | ||
|
||
} else { | ||
cb(null, decoded) | ||
} | ||
} | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
|
||
module.exports.extract = require(__dirname + '/lib/extract'); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
{ | ||
"name": "mhtml", | ||
"version": "0.0.1", | ||
"description": "Convert MHTML files to HTML.", | ||
"main": "mhtml.js", | ||
"directories": { | ||
"example": "example" | ||
}, | ||
"scripts": { | ||
"test": "make test" | ||
}, | ||
"repository": { | ||
"type": "git", | ||
"url": "tba" | ||
}, | ||
"keywords": [ | ||
"convert", | ||
"mhtml", | ||
"mhtml" | ||
], | ||
"author": "Bala Clark <balaclark@gmail.com>", | ||
"license": "MIT", | ||
"dependencies": { | ||
"mimelib": "~0.2.14", | ||
"mkdirp": "~0.3.5", | ||
"cheerio": "~0.12.4", | ||
"commander": "~2.0.0", | ||
"glob": "~3.2.7", | ||
"rimraf": "~2.2.2" | ||
}, | ||
"devDependencies": { | ||
"mocha": "~1.14.0", | ||
"should": "~2.1.0", | ||
"fs-extra": "~0.8.1", | ||
"istanbul": "~0.1.45" | ||
} | ||
} |
Oops, something went wrong.