00001 <?php
00002
00003
00012 abstract class EscapeParser {
00013 private static $source;
00014
00015 const EOT = 0;
00016 const GT = 1;
00017 const LT = 2;
00018 const AMP = 3;
00019 const CB_START = 4;
00020 const CB_END = 5;
00021 const LIT_START = 6;
00022 const LIT_END = 7;
00023 const ENT_START = 9;
00024 const ENT_END = 10;
00025 const CMD = 11;
00026 const ID = 12;
00027 const ARG_START = 13;
00028 const ARG_END = 14;
00029 const EQUAL = 15;
00030 const COMMA = 16;
00031 const BLOCK_START = 17;
00032 const BLOCK_END = 5;
00033 const NORMAL = 18;
00034 const ENTITY = 19;
00035 const UNESC_ENTITY = 40;
00036
00037 private static $pos = 0;
00038 private static $tokenType = self::EOT;
00039 private static $token = '';
00040 private static $curLine = 0;
00041 private static $lineOffset = 0;
00042 private static $lastToken = '';
00043 private static $blockStart = 0;
00044
00045 private static $errors = array();
00046
00056 public static function parseString($str, $preAppendErr = '') {
00057 self::$source = $str;
00058 self::$pos = 0;
00059 self::$tokenType = self::EOT;
00060 self::$token = '';
00061 self::$curLine = 1;
00062 self::$lineOffset = 0;
00063 self::$lastToken = '';
00064 self::$blockStart = 0;
00065 self::$errors = array();
00066
00067
00068
00069
00070 if(!self::advance() || ($ret = self::top()) === false) {
00071 $newErrors = array();
00072 foreach(self::$errors as $err)
00073 $newErrors[] = $preAppendErr . $err;
00074 return $newErrors;
00075 }
00076 else
00077 return $ret;
00078 }
00079
00087 private static function advance() {
00088 self::$lastToken = self::$token;
00089 if(self::$pos >= strlen(self::$source)) {
00090 self::$tokenType = self::EOT;
00091 self::$token = '';
00092 self::$pos++;
00093 return true;
00094 }
00095
00096 if(self::$source{self::$pos} == '&') {
00097 self::$tokenType = self::UNESC_ENTITY;
00098 self::$token = '&';
00099 self::$pos++;
00100 return true;
00101 }
00102 else if(self::$source{self::$pos} == '[') {
00103 self::$tokenType = self::ARG_START;
00104 self::$token = '[';
00105 self::$pos++;
00106 return true;
00107 }
00108 else if(self::$source{self::$pos} == ']') {
00109 self::$tokenType = self::ARG_END;
00110 self::$token = ']';
00111 self::$pos++;
00112 return true;
00113 }
00114 else if(self::$source{self::$pos} == '{') {
00115 self::$tokenType = self::CB_START;
00116 self::$token = '{';
00117 self::$pos++;
00118 return true;
00119 }
00120 else if(self::$source{self::$pos} == ')') {
00121 self::$tokenType = self::ENT_END;
00122 self::$token = ')';
00123 self::$pos++;
00124 return true;
00125 }
00126 else if(self::$source{self::$pos} == ',') {
00127 self::$tokenType = self::COMMA;
00128 self::$token = ',';
00129 self::$pos++;
00130 return true;
00131 }
00132 else if(self::$source{self::$pos} == '=') {
00133 self::$tokenType = self::EQUAL;
00134 self::$token = '=';
00135 self::$pos++;
00136 return true;
00137 }
00138 else if(self::$source{self::$pos} != '\\') {
00139 self::$tokenType = self::ENTITY;
00140 self::$token = '';
00141 while(self::$pos < strlen(self::$source) &&
00142 (self::$source{self::$pos} != '\\' && self::$source{self::$pos} != ')'
00143 && self::$source{self::$pos} != ']' && self::$source{self::$pos} != ','
00144 && self::$source{self::$pos} != '=' && self::$source{self::$pos} != '&')) {
00145 if(self::$tokenType == self::ENTITY && (!ctype_alnum(self::$source{self::$pos})
00146 && self::$source{self::$pos} != '_' && self::$source{self::$pos} != '#'
00147 && self::$source{self::$pos} != '$'))
00148 self::$tokenType = self::NORMAL;
00149 if(self::$source{self::$pos} == "\n" || self::$source{self::$pos} == "\r") {
00150 self::$lineOffset = self::$pos + 1;
00151 self::$curLine++;
00152 }
00153 self::$token .= self::$source{self::$pos};
00154 self::$pos++;
00155 }
00156 return true;
00157 }
00158
00159 self::$pos++;
00160 if(self::$pos >= strlen(self::$source)) {
00161 self::$errors[] = 'Unexpected end of text on line ' . self::$curLine;
00162 return false;
00163 }
00164 if(self::$source{self::$pos} == "\n" || self::$source{self::$pos} == "\r") {
00165 self::$curLine++;
00166 self::$lineOffset = self::$pos + 1;
00167 }
00168
00169 $chr = self::$source{self::$pos};
00170 self::$token = "\\$chr";
00171 switch($chr) {
00172 case '<':
00173 self::$tokenType = self::LT;
00174 break;
00175 case '>';
00176 self::$tokenType = self::GT;
00177 break;
00178 case '&';
00179 self::$tokenType = self::AMP;
00180 break;
00181 case '{':
00182 self::$tokenType = self::CB_START;
00183 break;
00184 case '}':
00185 self::$tokenType = self::CB_END;
00186 break;
00187 case '[':
00188 self::$tokenType = self::LIT_START;
00189 break;
00190 case ']':
00191 self::$tokenType = self::LIT_END;
00192 break;
00193 case '(':
00194 self::$tokenType = self::ENT_START;
00195 break;
00196 case ':':
00197 self::$tokenType = self::CMD;
00198 break;
00199 default:
00200 if($chr == "\n" || $chr == "\r") {
00201 self::$curLine++;
00202 self::$lineOffset = self::$pos + 1;
00203 }
00204 self::$token = $chr;
00205 self::$tokenType = self::NORMAL;
00206 break;
00207 }
00208 self::$pos++;
00209
00210 return true;
00211 }
00212
00220 private static function top() {
00221 $collect = '';
00222 while(self::$tokenType != self::EOT) {
00223 switch(self::$tokenType) {
00224 case self::UNESC_ENTITY:
00225 self::$errors[] = "On line " . self::$curLine . " [column " . (self::$pos - self::$lineOffset)
00226 . "]: Regular XML entities not supported!";
00227 return false;
00228 break;
00229 case self::GT:
00230 $collect .= '&gt;';
00231 if(!self::advance()) return false;
00232 break;
00233 case self::LT:
00234 $collect .= '&lt;';
00235 if(!self::advance()) return false;
00236 break;
00237 case self::AMP:
00238 $collect .= '&amp;';
00239 if(!self::advance()) return false;
00240 break;
00241 case self::ENT_START:
00242 if(!self::advance()) return false;
00243 $ret = self::parseEntity();
00244 if($ret === false) return false;
00245 $collect .= $ret;
00246 break;
00247 case self::LIT_START:
00248 if(!self::advance()) return false;
00249 $ret = self::parseLiteral();
00250 if($ret === false) return false;
00251 $collect .= $ret;
00252 break;
00253 case self::CB_START:
00254 if(!self::advance()) return false;
00255 $ret = self::parseCodeBlock();
00256 if($ret === false) return false;
00257 $collect .= $ret;
00258 break;
00259 case self::CMD:
00260 if(!self::advanced()) return false;
00261 $ret = self::parseCommand();
00262 if($ret === false) return false;
00263 $collect .= $ret;
00264 break;
00265 default:
00266 $collect .= self::$token;
00267 if(!self::advance()) return false;
00268 break;
00269 }
00270 }
00271
00272 return $collect;
00273 }
00274
00282 private static function parseEntity() {
00283 if(self::$tokenType != self::ENTITY) {
00284 self::$errors[] = "On line " . self::$curLine . " [column " . (self::$pos - self::$lineOffset)
00285 . "]: Entity expected after '<tt>\\(</tt>' (invalid: '" . htmlspecialchars(self::$token) . "')";
00286 return false;
00287 }
00288 $entity = self::$token;
00289 if(!self::advance()) return false;
00290 if(self::$tokenType != self::ENT_END) {
00291 self::$errors[] = "On line " . self::$curLine . " [column " . (self::$pos - self::$lineOffset)
00292 . "]: Right parenthesis expected after entity ('<tt>\\(" . htmlspecialchars(self::$lastToken) . "</tt>')";
00293 return false;
00294 }
00295 if(!self::advance()) return false;
00296 return '&' . $entity . ';';
00297 }
00298
00306 private static function parseLiteral() {
00307 $collect = '';
00308 self::$blockStart = self::$curLine;
00309 while(self::$tokenType != self::LIT_END) {
00310 if(self::$tokenType == self::EOT) {
00311 self::$errors[] = 'Unclosed literal string started on line ' . self::$blockStart . " [column "
00312 . (self::$pos - self::$lineOffset) . "] (missing '<tt>\\]</tt>')";
00313 return false;
00314 }
00315 $collect .= self::$token;
00316 if(!self::advance()) return false;
00317 }
00318 if(!self::advance()) return false;
00319 return '<span><![CDATA[' . base64_encode(htmlspecialchars($collect)) . ']]></span>';
00320 }
00321
00329 private static function parseCodeBlock() {
00330 $collect = '';
00331 self::$blockStart = self::$curLine;
00332 while(self::$tokenType != self::CB_END) {
00333 if(self::$tokenType == self::EOT) {
00334 self::$errors[] = 'Unclosed code block started on line ' . self::$blockStart . " [column "
00335 . (self::$pos - self::$lineOffset) . "] (missing '<tt>\\}</tt>')";
00336 return false;
00337 }
00338 $collect .= self::$token;
00339 if(!self::advance()) return false;
00340 }
00341 if(!self::advance()) return false;
00342 return '<p><pre><![CDATA[' . base64_encode(htmlspecialchars($collect)) . ']]></pre></p>';
00343 }
00344
00352 private static function parseCommand() {
00353 self::$errors[] = 'Commands not supported yet!';
00354 return false;
00355 }
00356 }
00357
00358 ?>