<?php
class PlancakeEmailParser {
const PLAINTEXT = 1;
const HTML = 2;
/**
*
* @var boolean
*/
private $isImapExtensionAvailable = false;
/**
*
* @var string
*/
private $emailRawContent;
/**
*
* @var associative array
*/
protected $rawFields;
/**
*
* @var array of string (each element is a line)
*/
protected $rawBodyLines;
/**
*
* @param string $emailRawContent
*/
public function __construct($emailRawContent) {
$this->emailRawContent = $emailRawContent;
$this->extractHeadersAndRawBody();
if (function_exists('imap_open')) {
$this->isImapExtensionAvailable = true;
}
}
private function extractHeadersAndRawBody()
{
$lines = preg_split("/(\r?\n|\r)/", $this->emailRawContent);
$currentHeader = '';
$i = 0;
foreach ($lines as $line)
{
if(self::isNewLine($line))
{
// end of headers
$this->rawBodyLines = array_slice($lines, $i);
break;
}
if ($this->isLineStartingWithPrintableChar($line)) // start of new header
{
preg_match('/([^:]+): ?(.*)$/', $line, $matches);
$newHeader = strtolower($matches[1]);
$value = $matches[2];
$this->rawFields[$newHeader] = $value;
$currentHeader = $newHeader;
}
else // more lines related to the current header
{
if ($currentHeader) { // to prevent notice from empty lines
$this->rawFields[$currentHeader] .= substr($line, 1);
}
}
$i++;
}
}
/**
*
* @return string (in UTF-8 format)
* @throws Exception if a subject header is not found
*/
public function getSubject()
{
if (!isset($this->rawFields['subject']))
{
throw new Exception("Couldn't find the subject of the email");
}
$ret = '';
if ($this->isImapExtensionAvailable) {
foreach (imap_mime_header_decode($this->rawFields['subject']) as $h) { // subject can span into several lines
$charset = ($h->charset == 'default') ? 'US-ASCII' : $h->charset;
$ret .= iconv($charset, "UTF-8//TRANSLIT", $h->text);
}
} else {
$ret = utf8_encode(iconv_mime_decode($this->rawFields['subject']));
}
return $ret;
}
/**
*
* @return array
*/
public function getCc()
{
if (!isset($this->rawFields['cc']))
{
return array();
}
return explode(',', $this->rawFields['cc']);
}
/**
*
* @return array
* @throws Exception if a to header is not found or if there are no recipient
*/
public function getTo()
{
if ( (!isset($this->rawFields['to'])) || (!count($this->rawFields['to'])))
{
throw new Exception("Couldn't find the recipients of the email");
}
return explode(',', $this->rawFields['to']);
}
/**
* return string - UTF8 encoded
*
* Example of an email body
*
--0016e65b5ec22721580487cb20fd
Content-Type: text/plain; charset=ISO-8859-1
Hi all. I am new to Android development.
Please help me.
--
My signature
email: myemail@gmail.com
web: http://www.example.com
--0016e65b5ec22721580487cb20fd
Content-Type: text/html; charset=ISO-8859-1
*/
public function getBody($returnType=self::PLAINTEXT)
{
$body = '';
$detectedContentType = false;
$contentTransferEncoding = null;
$charset = 'ASCII';
$waitingForContentStart = true;
if ($returnType == self::HTML)
$contentTypeRegex = '/^Content-Type: ?text\/html/i';
else
$contentTypeRegex = '/^Content-Type: ?text\/plain/i';
// there could be more than one boundary
preg_match_all('/boundary=(.*)/', $this->emailRawContent, $matches);
$boundaries = $matches[1];
//preg_match('!boundary=(.*)$!mi', $this->emailRawContent, $matches); //*Removed 17Feb2017 MC
//preg_match('/boundary=(.*)/', $this->emailRawContent, $matches); //Previous line was not catching boundaries properly
//$boundary = str_replace(array("'", '"'), '', $matches[1]);
// sometimes boundaries are delimited by quotes - we want to remove them
foreach($boundaries as $i => $v) {
$boundaries[$i] = str_replace(array("'", '"'), '', $v);
}
foreach ($this->rawBodyLines as $line) {
if (!$detectedContentType) {
if (preg_match($contentTypeRegex, $line, $matches)) {
$detectedContentType = true;
}
if(preg_match('/charset=(.*)/i', $line, $matches)) {
$charset = strtoupper(trim($matches[1], '"'));
}
} else if ($detectedContentType && $waitingForContentStart) {
if(preg_match('/charset=(.*)/i', $line, $matches)) {
$charset = strtoupper(trim($matches[1], '"'));
}
if ($contentTransferEncoding == null && preg_match('/^Content-Transfer-Encoding: ?(.*)/i', $line, $matches)) {
$contentTransferEncoding = strtoupper($matches[1]); //MC 04Nov16: Added strtoupper
}
if (self::isNewLine($line)) {
$waitingForContentStart = false;
}
} else { // ($detectedContentType && !$waitingForContentStart)
// collecting the actual content until we find the delimiter
// if the delimited is AAAAA, the line will be --AAAAA - that's why we use substr
if (is_array($boundaries)) {
if (in_array(substr($line, 2), $boundaries)) { // found the delimiter
break;
}
} elseif (strpos($line, $boundary)) {
break;
}
$body .= $line . "\n";
}
}
if (!$detectedContentType)
{
// if here, we missed the text/plain content-type (probably it was
// in the header), thus we assume the whole body is what we are after
$body = implode("\n", $this->rawBodyLines);
}
// removing trailing new lines
$body = preg_replace('/((\r?\n)*)$/', '', $body);
if ($contentTransferEncoding == 'BASE64') //MC 04Nov16: changed base64 to BASE64
$body = base64_decode($body, true);
else if ($contentTransferEncoding == 'QUOTED-PRINTABLE') //MC 04Nov16: changed quoted-printable to QUOTED-PRINTABLE
$body = quoted_printable_decode($body);
if($charset != 'UTF-8') {
// FORMAT=FLOWED, despite being popular in emails, it is not
// supported by iconv
$charset = str_replace("FORMAT=FLOWED", "", $charset);
$body = iconv($charset, 'UTF-8//TRANSLIT', $body);
if ($body === FALSE) { // iconv returns FALSE on failure
$body = utf8_encode($body);
}
}
return $body;
}
/**
* @return string - UTF8 encoded
*
*/
public function getPlainBody()
{
return $this->getBody(self::PLAINTEXT);
}
/**
* return string - UTF8 encoded
*/
public function getHTMLBody()
{
return $this->getBody(self::HTML);
}
/**
* N.B.: if the header doesn't exist an empty string is returned
*
* @param string $headerName - the header we want to retrieve
* @return string - the value of the header
*/
public function getHeader($headerName)
{
$headerName = strtolower($headerName);
if (isset($this->rawFields[$headerName]))
{
return $this->rawFields[$headerName];
}
return '';
}
/**
*
* @param string $line
* @return boolean
*/
public static function isNewLine($line)
{
$line = str_replace("\r", '', $line);
$line = str_replace("\n", '', $line);
return (strlen($line) === 0);
}
/**
*
* @param string $line
* @return boolean
*/
private function isLineStartingWithPrintableChar($line)
{
return preg_match('/^[A-Za-z]/', $line);
}
}
$rawEmail = "From 8083199272@mms.att.net Tue Jul 03 19:10:49 2018\nReceived: from stcotaapp-apps-sfm1a.mobile.att.net ([166.216.152.37]:50252 helo=stcceg-mtmta01.wnsnet.attws.com)\n by gator3049.hostgator.com with esmtps (TLSv1:DHE-RSA-AES128-SHA:128)\n (Exim 4.91)\n (envelope-from <8083199272@mms.att.net>)\n id 1faVNh-002Xxu-GQ\n for bosque@firepage.org; Tue, 03 Jul 2018 19:10:49 -0500\nReceived: from alnnms01 ([107.79.70.30])\n by stcceg-mtmta01.wnsnet.attws.com with bizsmtp\n id 69421y01Y0fBW5X01QAe4c; Tue, 03 Jul 2018 19:10:38 -0500\nMessage-ID: <69421y01Y0fBW5X01QAe4c@txt.att.net>\nIn-Reply-To: 1383951446.40200251530663038311.JavaMail.nems@alnnms01\nX-Mms-Message-Type: m-send-req\nX-Mms-Transaction-Id: T16462a1a936\nX-Mms-MMS-Version: 1.2\nTo: bosque@firepage.org\nX-Mms-Message-Class: Personal\nX-Mms-Priority: Normal\nX-Mms-Delivery-Report: No\nX-Mms-Read-Reply: No\nFrom: 8083199272@mms.att.net\nDate: Tue, 3 Jul 2018 19:10:38 -0500 (CDT)\nX-Mms-Sender-Visibility: Show\nContent-Type: multipart/mixed; \n boundary=\"----=_Part_1669496_1731281007.1530663038311\"\nMIME-Version: 1.0\nDKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=mms.att.net;\n s=EMG20171113; t=1530663038;\n bh=+kwzoNYvPgMRHfPJi5CwsNVIk5NlSAOY1AHbjrbjBAI=;\n h=In-Reply-To:To:From:Date;\n b=nMIh249YCq/Er2msND65RSC4HgQpbI3KtVOJ0CvZO3rud6mxY44a17RdeRpwLa3YT\n QPWxVW8QVZtD8/Bc4Tpf2DqGjOsR92pQRP9w79zXSssG1pZ0vgDeLldhF92hkj3n/5\n dG2/1jOaMqO4MvIhyX4U5+dp8EI/2xPbrkAu1Wm9LmJuMCJ3eGckyi7Zfk3x4+P1kx\n W5KEBEOi0FPyHIJD/pTmZJe5n179CUFToz+CKgjtNeFH+kkak8NUUx+GCHgL49030A\n jbw53rDN+Z2g9Ts4bYx63Ywf1zX0ZRo0gwXAFTeTY6KbGgJcXNVfWxVVVY27GGtRO6\n xExmhRGYpR6Tg==\n\n------=_Part_1669496_1731281007.1530663038311\nContent-Type: text/plain; charset=UTF-8\nContent-ID: <text_1530663119159.txt>\nContent-Location: text_1530663119159.txt\nContent-Transfer-Encoding: BASE64\n\nVGVzdCBwYWdlIGltIGRyaXZpbmcgdGVzdCBwYWdl\n------=_Part_1669496_1731281007.1530663038311--\n\n\n";
$emailParser = new PlancakeEmailParser($rawEmail);
echo print_r($emailParser->getPlainBody());
?>
preferences:
32.66 ms | 402 KiB | 5 Q