Skip to content

Commit 9dcd31c

Browse files
author
dan_plan
committed
first commit
0 parents  commit 9dcd31c

File tree

2 files changed

+292
-0
lines changed

2 files changed

+292
-0
lines changed

PlancakeEmailParser.php

Lines changed: 269 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,269 @@
1+
<?php
2+
3+
/*************************************************************************************
4+
* ===================================================================================*
5+
* Software by: Danyuki Software Limited *
6+
* This file is part of Plancake. *
7+
* *
8+
* Copyright 2009-2010-2011 by: Danyuki Software Limited *
9+
* Support, News, Updates at: http://www.plancake.com *
10+
* Licensed under the AGPL version 3 license. * *
11+
* Danyuki Software Limited is registered in England and Wales (Company No. 07554549) *
12+
**************************************************************************************
13+
* Plancake is distributed in the hope that it will be useful, *
14+
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
15+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
16+
* GNU Affero General Public License for more details. *
17+
* *
18+
* You should have received a copy of the GNU Affero General Public License *
19+
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
20+
* *
21+
**************************************************************************************/
22+
23+
/**
24+
* Extracts the headers and the body of an email
25+
* Obviously it can't extract the bcc header because it doesn't appear in the content
26+
* of the email.
27+
*
28+
* @author dan
29+
*/
30+
class PlancakeEmailParser {
31+
32+
/**
33+
*
34+
* @var string
35+
*/
36+
private $emailRawContent;
37+
38+
/**
39+
*
40+
* @var associative array
41+
*/
42+
protected $rawFields;
43+
44+
/**
45+
*
46+
* @var array of string (each element is a line)
47+
*/
48+
protected $rawBodyLines;
49+
50+
/**
51+
*
52+
* @param string $emailRawContent
53+
*/
54+
public function __construct($emailRawContent) {
55+
$this->emailRawContent = $emailRawContent;
56+
57+
$this->extractHeadersAndRawBody();
58+
}
59+
60+
private function extractHeadersAndRawBody()
61+
{
62+
$lines = preg_split("/(\r?\n)/", $this->emailRawContent);
63+
64+
$currentHeader = '';
65+
66+
$i = 0;
67+
foreach ($lines as $line)
68+
{
69+
if(self::isNewLine($line))
70+
{
71+
// end of headers
72+
$this->rawBodyLines = array_slice($lines, $i);
73+
break;
74+
}
75+
76+
if ($this->isLineStartingWithPrintableChar($line)) // start of new header
77+
{
78+
preg_match('/([^:]+): ?(.*)$/', $line, $matches);
79+
$newHeader = strtolower($matches[1]);
80+
$value = $matches[2];
81+
$this->rawFields[$newHeader] = $value;
82+
$currentHeader = $newHeader;
83+
}
84+
else // more lines related to the current header
85+
{
86+
$this->rawFields[$currentHeader] .= substr($line, 1);
87+
}
88+
$i++;
89+
}
90+
}
91+
92+
/**
93+
*
94+
* @return string (in UTF-8 format)
95+
* @throws Exception if a subject header is not found
96+
*/
97+
public function getSubject()
98+
{
99+
if (!isset($this->rawFields['subject']))
100+
{
101+
throw new Exception("Couldn't find the subject of the email");
102+
}
103+
return utf8_encode(iconv_mime_decode($this->rawFields['subject']));
104+
}
105+
106+
/**
107+
*
108+
* @return array
109+
*/
110+
public function getCc()
111+
{
112+
if (!isset($this->rawFields['cc']))
113+
{
114+
return array();
115+
}
116+
117+
return explode(',', $this->rawFields['cc']);
118+
}
119+
120+
/**
121+
*
122+
* @return array
123+
* @throws Exception if a to header is not found or if there are no recipient
124+
*/
125+
public function getTo()
126+
{
127+
if ( (!isset($this->rawFields['to'])) || (!count($this->rawFields['to'])))
128+
{
129+
throw new Exception("Couldn't find the recipients of the email");
130+
}
131+
return explode(',', $this->rawFields['to']);
132+
}
133+
134+
/**
135+
* @return string - UTF8 encoded
136+
*
137+
--0016e65b5ec22721580487cb20fd
138+
Content-Type: text/plain; charset=ISO-8859-1
139+
140+
Hi all. I am new to Android development.
141+
Please help me.
142+
143+
--
144+
My signature
145+
146+
email: myemail@gmail.com
147+
web: http://www.example.com
148+
149+
--0016e65b5ec22721580487cb20fd
150+
Content-Type: text/html; charset=ISO-8859-1
151+
*/
152+
public function getPlainBody()
153+
{
154+
$previousLine = '';
155+
$plainBody = '';
156+
$delimiter = '';
157+
$detectedContentType = false;
158+
$waitingForContentStart = true;
159+
160+
foreach ($this->rawBodyLines as $line) {
161+
if (!$detectedContentType) {
162+
if (preg_match('/^Content-Type: ?text\/plain/', $line, $matches)) {
163+
$detectedContentType = true;
164+
$delimiter = $previousLine;
165+
}
166+
} else if ($detectedContentType && $waitingForContentStart) {
167+
if (self::isNewLine($line)) {
168+
$waitingForContentStart = false;
169+
}
170+
} else { // ($detectedContentType && !$waitingForContentStart)
171+
// collecting the actual content until we find the delimiter
172+
if ($line == $delimiter) { // found the delimiter
173+
break;
174+
}
175+
$plainBody .= $line . "\n";
176+
}
177+
178+
$previousLine = $line;
179+
}
180+
181+
if (!$detectedContentType)
182+
{
183+
// if here, we missed the text/plain content-type (probably it was)
184+
// in the header, thus we assume the whole body is plain text
185+
$plainBody = implode("\n", $this->rawBodyLines);
186+
}
187+
188+
// removing trailing new lines
189+
$plainBody = preg_replace('/((\r?\n)*)$/', '', $plainBody);
190+
191+
return utf8_encode(quoted_printable_decode($plainBody));
192+
}
193+
194+
/**
195+
* return string - UTF8 encoded
196+
*/
197+
public function getHTMLBody()
198+
{
199+
$previousLine = '';
200+
$htmlBody = '';
201+
$delimiter = '';
202+
$detectedContentType = false;
203+
$waitingForContentStart = true;
204+
205+
foreach ($this->rawBodyLines as $line) {
206+
if (!$detectedContentType) {
207+
if (preg_match('/^Content-Type: ?text\/html/', $line, $matches)) {
208+
$detectedContentType = true;
209+
$delimiter = $previousLine;
210+
}
211+
} else if ($detectedContentType && $waitingForContentStart) {
212+
if (self::isNewLine($line)) {
213+
$waitingForContentStart = false;
214+
}
215+
} else { // ($detectedContentType && !$waitingForContentStart)
216+
// collecting the actual content until we find the delimiter
217+
if ($line == $delimiter) { // found the delimiter
218+
break;
219+
}
220+
$htmlBody .= $line . "\n";
221+
}
222+
223+
$previousLine = $line;
224+
}
225+
226+
return utf8_encode($htmlBody);
227+
}
228+
229+
/**
230+
* N.B.: if the header doesn't exist an empty string is returned
231+
*
232+
* @param string $headerName - the header we want to retrieve
233+
* @return string - the value of the header
234+
*/
235+
public function getHeader($headerName)
236+
{
237+
$headerName = strtolower($headerName);
238+
239+
if (isset($this->rawFields[$headerName]))
240+
{
241+
return $this->rawFields[$headerName];
242+
}
243+
return '';
244+
}
245+
246+
/**
247+
*
248+
* @param string $line
249+
* @return boolean
250+
*/
251+
public static function isNewLine($line)
252+
{
253+
$line = str_replace("\r", '', $line);
254+
$line = str_replace("\n", '', $line);
255+
256+
return (strlen($line) === 0);
257+
}
258+
259+
/**
260+
*
261+
* @param string $line
262+
* @return boolean
263+
*/
264+
private function isLineStartingWithPrintableChar($line)
265+
{
266+
return preg_match('/^[A-Za-z]/', $line);
267+
}
268+
}
269+
?>

README.txt

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
*** PLANCAKE PHP EMAIL PARSER ***
2+
3+
This library allows you to easily parse an email given its content (headers + body).
4+
5+
Usage example:
6+
7+
8+
9+
$emailPath = "/var/mail/spool/dan/new/12323344323234234234";
10+
$emailParser = new PlancakeEmailParser(file_get_contents($emailPath));
11+
12+
$emailTo = $emailParser->getTo();
13+
$emailSubject = $emailParser->getSubject();
14+
$emailCc = $emailParser->getCc();
15+
$emailDeliveredToHeader = $emailParser->getHeader('Delivered-To');
16+
$emailBody = $emailParser->getPlainBody();
17+
18+
19+
20+
21+
Plancake homepage: http://www.plancake.com
22+
Support at: http://www.plancake.com/forums/forum/13/support-for-developers/
23+

0 commit comments

Comments
 (0)