<?php
$html=<<<HTML
<a href="bla">123 "this" is asd</a>
<a href="bla">this should not be captured</a>
<a href="bla">no quotes in anchor text here</a>
<a href="bla">"445 is in quotes"</a>
<a href="bla">asd "blabla" sometimes</a>
<a href="bla">Je commence à avoir mal à la tête</a>
<a href="bla">something with quotes like “blabla” is bad</a>
HTML;
$dom = new DOMDocument;
$html=mb_convert_encoding($html,'HTML-ENTITIES',"UTF-8");
$dom->loadHTML($html,LIBXML_HTML_NODEFDTD); // 2nd params to remove DOCTYPE);
foreach($dom->getElementsByTagName('a') as $a){
//echo $a->nodeValue,"\n";
if(preg_match('~["“”]~u',$a->nodeValue)){
$remove[]=$a;
}
}
foreach($remove as $bad_a){
$bad_a->parentNode->removeChild($bad_a);
}
$result=mb_convert_encoding($dom->saveHTML(),"UTF-8",'HTML-ENTITIES');
echo preg_replace(['~^<html><body>|</body></html>$~','~\R+~'],['',"\n"],$result);
Deprecated: mb_convert_encoding(): Handling HTML entities via mbstring is deprecated; use htmlspecialchars, htmlentities, or mb_encode_numericentity/mb_decode_numericentity instead in /in/kXlLb on line 13
<a href="bla">this should not be captured</a>
<a href="bla">no quotes in anchor text here</a>
<a href="bla">Je commence à avoir mal à la tête</a>
Output for 8.3.5
Warning: PHP Startup: Unable to load dynamic library 'sodium.so' (tried: /usr/lib/php/8.3.5/modules/sodium.so (libsodium.so.23: cannot open shared object file: No such file or directory), /usr/lib/php/8.3.5/modules/sodium.so.so (/usr/lib/php/8.3.5/modules/sodium.so.so: cannot open shared object file: No such file or directory)) in Unknown on line 0
Deprecated: mb_convert_encoding(): Handling HTML entities via mbstring is deprecated; use htmlspecialchars, htmlentities, or mb_encode_numericentity/mb_decode_numericentity instead in /in/kXlLb on line 13
<a href="bla">this should not be captured</a>
<a href="bla">no quotes in anchor text here</a>
<a href="bla">Je commence à avoir mal à la tête</a>