3v4l.org

run code in 200+ php & hhvm versions
Bugs & Features
<?php class PlancakeEmailParser { const PLAINTEXT = 1; const HTML = 2; /** * * @var boolean */ private $isImapExtensionAvailable = false; /** * * @var string */ private $emailRawContent; /** * * @var associative array */ protected $rawFields; /** * * @var array of string (each element is a line) */ protected $rawBodyLines; /** * * @param string $emailRawContent */ public function __construct($emailRawContent) { $this->emailRawContent = $emailRawContent; $this->extractHeadersAndRawBody(); if (function_exists('imap_open')) { $this->isImapExtensionAvailable = true; } } private function extractHeadersAndRawBody() { $lines = preg_split("/(\r?\n|\r)/", $this->emailRawContent); $currentHeader = ''; $i = 0; foreach ($lines as $line) { if(self::isNewLine($line)) { // end of headers $this->rawBodyLines = array_slice($lines, $i); break; } if ($this->isLineStartingWithPrintableChar($line)) // start of new header { preg_match('/([^:]+): ?(.*)$/', $line, $matches); $newHeader = strtolower($matches[1]); $value = $matches[2]; $this->rawFields[$newHeader] = $value; $currentHeader = $newHeader; } else // more lines related to the current header { if ($currentHeader) { // to prevent notice from empty lines $this->rawFields[$currentHeader] .= substr($line, 1); } } $i++; } } /** * * @return string (in UTF-8 format) * @throws Exception if a subject header is not found */ public function getSubject() { if (!isset($this->rawFields['subject'])) { throw new Exception("Couldn't find the subject of the email"); } $ret = ''; if ($this->isImapExtensionAvailable) { foreach (imap_mime_header_decode($this->rawFields['subject']) as $h) { // subject can span into several lines $charset = ($h->charset == 'default') ? 'US-ASCII' : $h->charset; $ret .= iconv($charset, "UTF-8//TRANSLIT", $h->text); } } else { $ret = utf8_encode(iconv_mime_decode($this->rawFields['subject'])); } return $ret; } /** * * @return array */ public function getCc() { if (!isset($this->rawFields['cc'])) { return array(); } return explode(',', $this->rawFields['cc']); } /** * * @return array * @throws Exception if a to header is not found or if there are no recipient */ public function getTo() { if ( (!isset($this->rawFields['to'])) || (!count($this->rawFields['to']))) { throw new Exception("Couldn't find the recipients of the email"); } return explode(',', $this->rawFields['to']); } /** * return string - UTF8 encoded * * Example of an email body * --0016e65b5ec22721580487cb20fd Content-Type: text/plain; charset=ISO-8859-1 Hi all. I am new to Android development. Please help me. -- My signature email: myemail@gmail.com web: http://www.example.com --0016e65b5ec22721580487cb20fd Content-Type: text/html; charset=ISO-8859-1 */ public function getBody($returnType=self::PLAINTEXT) { $body = ''; $detectedContentType = false; $contentTransferEncoding = null; $charset = 'ASCII'; $waitingForContentStart = true; if ($returnType == self::HTML) $contentTypeRegex = '/^Content-Type: ?text\/html/i'; else $contentTypeRegex = '/^Content-Type: ?text\/plain/i'; // there could be more than one boundary preg_match_all('/boundary=(.*)/', $this->emailRawContent, $matches); $boundaries = $matches[1]; //preg_match('!boundary=(.*)$!mi', $this->emailRawContent, $matches); //*Removed 17Feb2017 MC //preg_match('/boundary=(.*)/', $this->emailRawContent, $matches); //Previous line was not catching boundaries properly //$boundary = str_replace(array("'", '"'), '', $matches[1]); // sometimes boundaries are delimited by quotes - we want to remove them foreach($boundaries as $i => $v) { $boundaries[$i] = str_replace(array("'", '"'), '', $v); } foreach ($this->rawBodyLines as $line) { if (!$detectedContentType) { if (preg_match($contentTypeRegex, $line, $matches)) { $detectedContentType = true; } if(preg_match('/charset=(.*)/i', $line, $matches)) { $charset = strtoupper(trim($matches[1], '"')); } } else if ($detectedContentType && $waitingForContentStart) { if(preg_match('/charset=(.*)/i', $line, $matches)) { $charset = strtoupper(trim($matches[1], '"')); } if ($contentTransferEncoding == null && preg_match('/^Content-Transfer-Encoding: ?(.*)/i', $line, $matches)) { $contentTransferEncoding = strtoupper($matches[1]); //MC 04Nov16: Added strtoupper } if (self::isNewLine($line)) { $waitingForContentStart = false; } } else { // ($detectedContentType && !$waitingForContentStart) // collecting the actual content until we find the delimiter // if the delimited is AAAAA, the line will be --AAAAA - that's why we use substr if (is_array($boundaries)) { if (in_array(substr($line, 2), $boundaries)) { // found the delimiter break; } } elseif (strpos($line, $boundary)) { break; } $body .= $line . "\n"; } } if (!$detectedContentType) { // if here, we missed the text/plain content-type (probably it was // in the header), thus we assume the whole body is what we are after $body = implode("\n", $this->rawBodyLines); } // removing trailing new lines $body = preg_replace('/((\r?\n)*)$/', '', $body); if ($contentTransferEncoding == 'BASE64') //MC 04Nov16: changed base64 to BASE64 $body = base64_decode($body, true); else if ($contentTransferEncoding == 'QUOTED-PRINTABLE') //MC 04Nov16: changed quoted-printable to QUOTED-PRINTABLE $body = quoted_printable_decode($body); if($charset != 'UTF-8') { // FORMAT=FLOWED, despite being popular in emails, it is not // supported by iconv $charset = str_replace("FORMAT=FLOWED", "", $charset); $body = iconv($charset, 'UTF-8//TRANSLIT', $body); if ($body === FALSE) { // iconv returns FALSE on failure $body = utf8_encode($body); } } return $body; } /** * @return string - UTF8 encoded * */ public function getPlainBody() { return $this->getBody(self::PLAINTEXT); } /** * return string - UTF8 encoded */ public function getHTMLBody() { return $this->getBody(self::HTML); } /** * N.B.: if the header doesn't exist an empty string is returned * * @param string $headerName - the header we want to retrieve * @return string - the value of the header */ public function getHeader($headerName) { $headerName = strtolower($headerName); if (isset($this->rawFields[$headerName])) { return $this->rawFields[$headerName]; } return ''; } /** * * @param string $line * @return boolean */ public static function isNewLine($line) { $line = str_replace("\r", '', $line); $line = str_replace("\n", '', $line); return (strlen($line) === 0); } /** * * @param string $line * @return boolean */ private function isLineStartingWithPrintableChar($line) { return preg_match('/^[A-Za-z]/', $line); } } $rawEmail = "From 8083199272@mms.att.net Tue Jul 03 19:10:49 2018\nReceived: from stcotaapp-apps-sfm1a.mobile.att.net ([166.216.152.37]:50252 helo=stcceg-mtmta01.wnsnet.attws.com)\n by gator3049.hostgator.com with esmtps (TLSv1:DHE-RSA-AES128-SHA:128)\n (Exim 4.91)\n (envelope-from <8083199272@mms.att.net>)\n id 1faVNh-002Xxu-GQ\n for bosque@firepage.org; Tue, 03 Jul 2018 19:10:49 -0500\nReceived: from alnnms01 ([107.79.70.30])\n by stcceg-mtmta01.wnsnet.attws.com with bizsmtp\n id 69421y01Y0fBW5X01QAe4c; Tue, 03 Jul 2018 19:10:38 -0500\nMessage-ID: <69421y01Y0fBW5X01QAe4c@txt.att.net>\nIn-Reply-To: 1383951446.40200251530663038311.JavaMail.nems@alnnms01\nX-Mms-Message-Type: m-send-req\nX-Mms-Transaction-Id: T16462a1a936\nX-Mms-MMS-Version: 1.2\nTo: bosque@firepage.org\nX-Mms-Message-Class: Personal\nX-Mms-Priority: Normal\nX-Mms-Delivery-Report: No\nX-Mms-Read-Reply: No\nFrom: 8083199272@mms.att.net\nDate: Tue, 3 Jul 2018 19:10:38 -0500 (CDT)\nX-Mms-Sender-Visibility: Show\nContent-Type: multipart/mixed; \n boundary=\"----=_Part_1669496_1731281007.1530663038311\"\nMIME-Version: 1.0\nDKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=mms.att.net;\n s=EMG20171113; t=1530663038;\n bh=+kwzoNYvPgMRHfPJi5CwsNVIk5NlSAOY1AHbjrbjBAI=;\n h=In-Reply-To:To:From:Date;\n b=nMIh249YCq/Er2msND65RSC4HgQpbI3KtVOJ0CvZO3rud6mxY44a17RdeRpwLa3YT\n QPWxVW8QVZtD8/Bc4Tpf2DqGjOsR92pQRP9w79zXSssG1pZ0vgDeLldhF92hkj3n/5\n dG2/1jOaMqO4MvIhyX4U5+dp8EI/2xPbrkAu1Wm9LmJuMCJ3eGckyi7Zfk3x4+P1kx\n W5KEBEOi0FPyHIJD/pTmZJe5n179CUFToz+CKgjtNeFH+kkak8NUUx+GCHgL49030A\n jbw53rDN+Z2g9Ts4bYx63Ywf1zX0ZRo0gwXAFTeTY6KbGgJcXNVfWxVVVY27GGtRO6\n xExmhRGYpR6Tg==\n\n------=_Part_1669496_1731281007.1530663038311\nContent-Type: text/plain; charset=UTF-8\nContent-ID: <text_1530663119159.txt>\nContent-Location: text_1530663119159.txt\nContent-Transfer-Encoding: BASE64\n\nVGVzdCBwYWdlIGltIGRyaXZpbmcgdGVzdCBwYWdl\n------=_Part_1669496_1731281007.1530663038311--\n\n\n"; $emailParser = new PlancakeEmailParser($rawEmail); echo print_r($emailParser->getPlainBody()); ?>
Output for 5.6.30, hhvm-3.18.5 - 3.22.0, 7.0.30 - 7.3.0beta1
1