Source for file stringparser.class.php

Documentation is available at stringparser.class.php

  1. <?php
  2. /**
  3. * Generic string parsing infrastructure
  4. *
  5. * These classes provide the means to parse any kind of string into a tree-like
  6. * memory structure. It would e.g. be possible to create an HTML parser based
  7. * upon this class.
  8. *
  9. * Version: 0.3.0
  10. *
  11. * @author Christian Seiler <spam@christian-seiler.de>
  12. * @copyright Christian Seiler 2006
  13. * @package stringparser
  14. *
  15. * This program is free software; you can redistribute it and/or modify
  16. * it under the terms of either:
  17. *
  18. * a) the GNU General Public License as published by the Free
  19. * Software Foundation; either version 1, or (at your option) any
  20. * later version, or
  21. *
  22. * b) the Artistic License as published by Larry Wall, either version 2.0,
  23. * or (at your option) any later version.
  24. *
  25. * This program is distributed in the hope that it will be useful,
  26. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  27. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See either
  28. * the GNU General Public License or the Artistic License for more details.
  29. *
  30. * You should have received a copy of the Artistic License with this Kit,
  31. * in the file named "Artistic.clarified". If not, I'll be glad to provide
  32. * one.
  33. *
  34. * You should also have received a copy of the GNU General Public License
  35. * along with this program in the file named "COPYING"; if not, write to
  36. * the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
  37. * MA 02111-1307, USA.
  38. */
  39.  
  40. /**
  41. * String parser mode: Search for the next character
  42. * @see StringParser::_parserMode
  43. */
  44. define ('STRINGPARSER_MODE_SEARCH', 1);
  45. /**
  46. * String parser mode: Look at each character of the string
  47. * @see StringParser::_parserMode
  48. */
  49. define ('STRINGPARSER_MODE_LOOP', 2);
  50. /**
  51. * Filter type: Prefilter
  52. * @see StringParser::addFilter, StringParser::_prefilters
  53. */
  54. define ('STRINGPARSER_FILTER_PRE', 1);
  55. /**
  56. * Filter type: Postfilter
  57. * @see StringParser::addFilter, StringParser::_postfilters
  58. */
  59. define ('STRINGPARSER_FILTER_POST', 2);
  60.  
  61. /**
  62. * Generic string parser class
  63. *
  64. * This is an abstract class for any type of string parser.
  65. *
  66. * @package stringparser
  67. */
  68. class StringParser {
  69. /**
  70. * String parser mode
  71. *
  72. * There are two possible modes: searchmode and loop mode. In loop mode
  73. * every single character is looked at in a loop and it is then decided
  74. * what action to take. This is the most straight-forward approach to
  75. * string parsing but due to the nature of PHP as a scripting language,
  76. * it can also cost performance. In search mode the class posseses a
  77. * list of relevant characters for parsing and uses the
  78. * {@link PHP_MANUAL#strpos strpos} function to search for the next
  79. * relevant character. The search mode will be faster than the loop mode
  80. * in most circumstances but it is also more difficult to implement.
  81. * The subclass that does the string parsing itself will define which
  82. * mode it will implement.
  83. *
  84. * @access protected
  85. * @var int
  86. * @see STRINGPARSER_MODE_SEARCH, STRINGPARSER_MODE_LOOP
  87. */
  88. var $_parserMode = STRINGPARSER_MODE_SEARCH;
  89. /**
  90. * Raw text
  91. * @access protected
  92. * @var string
  93. */
  94. var $_text = '';
  95. /**
  96. * Parse stack
  97. * @access protected
  98. * @var array
  99. */
  100. var $_stack = array ();
  101. /**
  102. * Current position in raw text
  103. * @access protected
  104. * @var integer
  105. */
  106. var $_cpos = -1;
  107. /**
  108. * Root node
  109. * @access protected
  110. * @var mixed
  111. */
  112. var $_root = null;
  113. /**
  114. * Length of the text
  115. * @access protected
  116. * @var integer
  117. */
  118. var $_length = -1;
  119. /**
  120. * Flag if this object is already parsing a text
  121. *
  122. * This flag is to prevent recursive calls to the parse() function that
  123. * would cause very nasty things.
  124. *
  125. * @access protected
  126. * @var boolean
  127. */
  128. var $_parsing = false;
  129. /**
  130. * Strict mode
  131. *
  132. * Whether to stop parsing if a parse error occurs.
  133. *
  134. * @access public
  135. * @var boolean
  136. */
  137. var $strict = false;
  138. /**
  139. * Characters or strings to look for
  140. * @access protected
  141. * @var array
  142. */
  143. var $_charactersSearch = array ();
  144. /**
  145. * Characters currently allowed
  146. *
  147. * Note that this will only be evaluated in loop mode; in search mode
  148. * this would ruin every performance increase. Note that only single
  149. * characters are permitted here, no strings. Please also note that in
  150. * loop mode, {@link StringParser::_charactersSearch _charactersSearch}
  151. * is evaluated before this variable.
  152. *
  153. * If in strict mode, parsing is stopped if a character that is not
  154. * allowed is encountered. If not in strict mode, the character is
  155. * simply ignored.
  156. *
  157. * @access protected
  158. * @var array
  159. */
  160. var $_charactersAllowed = array ();
  161. /**
  162. * Current parser status
  163. * @access protected
  164. * @var int
  165. */
  166. var $_status = 0;
  167. /**
  168. * Prefilters
  169. * @access protected
  170. * @var array
  171. */
  172. var $_prefilters = array ();
  173. /**
  174. * Postfilters
  175. * @access protected
  176. * @var array
  177. */
  178. var $_postfilters = array ();
  179. /**
  180. * Recently reparsed?
  181. * @access protected
  182. * @var bool
  183. */
  184. var $_recentlyReparsed = false;
  185. /**
  186. * Constructor
  187. *
  188. * @access public
  189. */
  190. function StringParser () {
  191. }
  192. /**
  193. * Add a filter
  194. *
  195. * @access public
  196. * @param int $type The type of the filter
  197. * @param mixed $callback The callback to call
  198. * @return bool
  199. * @see STRINGPARSER_FILTER_PRE, STRINGPARSER_FILTER_POST
  200. */
  201. function addFilter ($type, $callback) {
  202. // make sure the function is callable
  203. if (!is_callable ($callback)) {
  204. return false;
  205. }
  206. switch ($type) {
  207. case STRINGPARSER_FILTER_PRE:
  208. $this->_prefilters[] = $callback;
  209. break;
  210. case STRINGPARSER_FILTER_POST:
  211. $this->_postfilters[] = $callback;
  212. break;
  213. default:
  214. return false;
  215. }
  216. return true;
  217. }
  218. /**
  219. * Remove all filters
  220. *
  221. * @access public
  222. * @param int $type The type of the filter or 0 for all
  223. * @return bool
  224. * @see STRINGPARSER_FILTER_PRE, STRINGPARSER_FILTER_POST
  225. */
  226. function clearFilters ($type = 0) {
  227. switch ($type) {
  228. case 0:
  229. $this->_prefilters = array ();
  230. $this->_postfilters = array ();
  231. break;
  232. case STRINGPARSER_FILTER_PRE:
  233. $this->_prefilters = array ();
  234. break;
  235. case STRINGPARSER_FILTER_POST:
  236. $this->_postfilters = array ();
  237. break;
  238. default:
  239. return false;
  240. }
  241. return true;
  242. }
  243. /**
  244. * This function parses the text
  245. *
  246. * @access public
  247. * @param string $text The text to parse
  248. * @return mixed Either the root object of the tree if no output method
  249. * is defined, the tree reoutput to e.g. a string or false
  250. * if an internal error occured, such as a parse error if
  251. * in strict mode or the object is already parsing a text.
  252. */
  253. function parse ($text) {
  254. if ($this->_parsing) {
  255. return false;
  256. }
  257. $this->_parsing = true;
  258. $this->_text = $this->_applyPrefilters ($text);
  259. $this->_output = null;
  260. $this->_length = strlen ($this->_text);
  261. $this->_cpos = 0;
  262. unset ($this->_stack);
  263. $this->_stack = array ();
  264. if (is_object ($this->_root)) {
  265. StringParser_Node::destroyNode ($this->_root);
  266. }
  267. unset ($this->_root);
  268. $this->_root =& new StringParser_Node_Root ();
  269. $this->_stack[0] =& $this->_root;
  270. $this->_parserInit ();
  271. $finished = false;
  272. while (!$finished) {
  273. switch ($this->_parserMode) {
  274. case STRINGPARSER_MODE_SEARCH:
  275. $res = $this->_searchLoop ();
  276. if (!$res) {
  277. $this->_parsing = false;
  278. return false;
  279. }
  280. break;
  281. case STRINGPARSER_MODE_LOOP:
  282. $res = $this->_loop ();
  283. if (!$res) {
  284. $this->_parsing = false;
  285. return false;
  286. }
  287. break;
  288. default:
  289. $this->_parsing = false;
  290. return false;
  291. }
  292. $res = $this->_closeRemainingBlocks ();
  293. if (!$res) {
  294. if ($this->strict) {
  295. $this->_parsing = false;
  296. return false;
  297. } else {
  298. $res = $this->_reparseAfterCurrentBlock ();
  299. if (!$res) {
  300. $this->_parsing = false;
  301. return false;
  302. }
  303. continue;
  304. }
  305. }
  306. $finished = true;
  307. }
  308. $res = $this->_modifyTree ();
  309. if (!$res) {
  310. $this->_parsing = false;
  311. return false;
  312. }
  313. $res = $this->_outputTree ();
  314. if (!$res) {
  315. $this->_parsing = false;
  316. return false;
  317. }
  318. if (is_null ($this->_output)) {
  319. $root =& $this->_root;
  320. unset ($this->_root);
  321. $this->_root = null;
  322. while (count ($this->_stack)) {
  323. unset ($this->_stack[count($this->_stack)-1]);
  324. }
  325. $this->_stack = array ();
  326. $this->_parsing = false;
  327. return $root;
  328. }
  329. $res = StringParser_Node::destroyNode ($this->_root);
  330. if (!$res) {
  331. $this->_parsing = false;
  332. return false;
  333. }
  334. unset ($this->_root);
  335. $this->_root = null;
  336. while (count ($this->_stack)) {
  337. unset ($this->_stack[count($this->_stack)-1]);
  338. }
  339. $this->_stack = array ();
  340. $this->_parsing = false;
  341. return $this->_output;
  342. }
  343. /**
  344. * Apply prefilters
  345. *
  346. * It is possible to specify prefilters for the parser to do some
  347. * manipulating of the string beforehand.
  348. */
  349. function _applyPrefilters ($text) {
  350. foreach ($this->_prefilters as $filter) {
  351. if (is_callable ($filter)) {
  352. $ntext = call_user_func ($filter, $text);
  353. if (is_string ($ntext)) {
  354. $text = $ntext;
  355. }
  356. }
  357. }
  358. return $text;
  359. }
  360. /**
  361. * Apply postfilters
  362. *
  363. * It is possible to specify postfilters for the parser to do some
  364. * manipulating of the string afterwards.
  365. */
  366. function _applyPostfilters ($text) {
  367. foreach ($this->_postfilters as $filter) {
  368. if (is_callable ($filter)) {
  369. $ntext = call_user_func ($filter, $text);
  370. if (is_string ($ntext)) {
  371. $text = $ntext;
  372. }
  373. }
  374. }
  375. return $text;
  376. }
  377. /**
  378. * Abstract method: Manipulate the tree
  379. * @access protected
  380. * @return bool
  381. */
  382. function _modifyTree () {
  383. return true;
  384. }
  385. /**
  386. * Abstract method: Output tree
  387. * @access protected
  388. * @return bool
  389. */
  390. function _outputTree () {
  391. // this could e.g. call _applyPostfilters
  392. return true;
  393. }
  394. /**
  395. * Restart parsing after current block
  396. *
  397. * To achieve this the current top stack object is removed from the
  398. * tree. Then the current item
  399. *
  400. * @access protected
  401. * @return bool
  402. */
  403. function _reparseAfterCurrentBlock () {
  404. // this should definitely not happen!
  405. if (($stack_count = count ($this->_stack)) < 2) {
  406. return false;
  407. }
  408. $topelem =& $this->_stack[$stack_count-1];
  409. $node_parent =& $topelem->_parent;
  410. // remove the child from the tree
  411. $res = $node_parent->removeChild ($topelem, false);
  412. if (!$res) {
  413. return false;
  414. }
  415. $res = $this->_popNode ();
  416. if (!$res) {
  417. return false;
  418. }
  419. // now try to get the position of the object
  420. if ($topelem->occurredAt < 0) {
  421. return false;
  422. }
  423. // HACK: could it be necessary to set a different status
  424. // if yes, how should this be achieved? Another member of
  425. // StringParser_Node?
  426. $this->_setStatus (0);
  427. $res = $this->_appendText ($this->_text{$topelem->occurredAt});
  428. if (!$res) {
  429. return false;
  430. }
  431. $this->_cpos = $topelem->occurredAt + 1;
  432. $this->_recentlyReparsed = true;
  433. return true;
  434. }
  435. /**
  436. * Abstract method: Close remaining blocks
  437. * @access protected
  438. */
  439. function _closeRemainingBlocks () {
  440. // everything closed
  441. if (count ($this->_stack) == 1) {
  442. return true;
  443. }
  444. // not everything closed
  445. if ($this->strict) {
  446. return false;
  447. }
  448. while (count ($this->_stack) > 1) {
  449. $res = $this->_popNode ();
  450. if (!$res) {
  451. return false;
  452. }
  453. }
  454. return true;
  455. }
  456. /**
  457. * Abstract method: Initialize the parser
  458. * @access protected
  459. */
  460. function _parserInit () {
  461. $this->_setStatus (0);
  462. }
  463. /**
  464. * Abstract method: Set a specific status
  465. * @access protected
  466. */
  467. function _setStatus ($status) {
  468. if ($status != 0) {
  469. return false;
  470. }
  471. $this->_charactersSearch = array ();
  472. $this->_charactersAllowed = array ();
  473. $this->_status = $status;
  474. return true;
  475. }
  476. /**
  477. * Abstract method: Handle status
  478. * @access protected
  479. * @param int $status The current status
  480. * @param string $needle The needle that was found
  481. * @return bool
  482. */
  483. function _handleStatus ($status, $needle) {
  484. $this->_appendText ($needle);
  485. $this->_cpos += strlen ($needle);
  486. return true;
  487. }
  488. /**
  489. * Search mode loop
  490. * @access protected
  491. * @return bool
  492. */
  493. function _searchLoop () {
  494. $i = 0;
  495. while (1) {
  496. // make sure this is false!
  497. $this->_recentlyReparsed = false;
  498. list ($needle, $offset) = $this->_strpos ($this->_charactersSearch, $this->_cpos);
  499. // parser ends here
  500. if ($needle === false) {
  501. // original status 0 => no problem
  502. if (!$this->_status) {
  503. break;
  504. }
  505. // not in original status? strict mode?
  506. if ($this->strict) {
  507. return false;
  508. }
  509. // break up parsing operation of current node
  510. $res = $this->_reparseAfterCurrentBlock ();
  511. if (!$res) {
  512. return false;
  513. }
  514. continue;
  515. }
  516. // get subtext
  517. $subtext = substr ($this->_text, $this->_cpos, $offset - $this->_cpos);
  518. $res = $this->_appendText ($subtext);
  519. if (!$res) {
  520. return false;
  521. }
  522. $this->_cpos = $offset;
  523. $res = $this->_handleStatus ($this->_status, $needle);
  524. if (!$res && $this->strict) {
  525. return false;
  526. }
  527. if (!$res) {
  528. $res = $this->_appendText ($this->_text{$this->_cpos});
  529. if (!$res) {
  530. return false;
  531. }
  532. $this->_cpos++;
  533. continue;
  534. }
  535. if ($this->_recentlyReparsed) {
  536. $this->_recentlyReparsed = false;
  537. continue;
  538. }
  539. $this->_cpos += strlen ($needle);
  540. }
  541. // get subtext
  542. if ($this->_cpos < strlen ($this->_text)) {
  543. $subtext = substr ($this->_text, $this->_cpos);
  544. $res = $this->_appendText ($subtext);
  545. if (!$res) {
  546. return false;
  547. }
  548. }
  549. return true;
  550. }
  551. /**
  552. * Loop mode loop
  553. *
  554. * @access protected
  555. * @return bool
  556. */
  557. function _loop () {
  558. // HACK: This method ist not yet implemented correctly, the code below
  559. // DOES NOT WORK! Do not use!
  560. return false;
  561. /*
  562. while ($this->_cpos < $this->_length) {
  563. $needle = $this->_strDetect ($this->_charactersSearch, $this->_cpos);
  564. if ($needle === false) {
  565. // not found => see if character is allowed
  566. if (!in_array ($this->_text{$this->_cpos}, $this->_charactersAllowed)) {
  567. if ($strict) {
  568. return false;
  569. }
  570. // ignore
  571. continue;
  572. }
  573. // lot's of FIXMES
  574. $res = $this->_appendText ($this->_text{$this->_cpos});
  575. if (!$res) {
  576. return false;
  577. }
  578. }
  579. // get subtext
  580. $subtext = substr ($this->_text, $offset, $offset - $this->_cpos);
  581. $res = $this->_appendText ($subtext);
  582. if (!$res) {
  583. return false;
  584. }
  585. $this->_cpos = $subtext;
  586. $res = $this->_handleStatus ($this->_status, $needle);
  587. if (!$res && $strict) {
  588. return false;
  589. }
  590. }
  591. // original status 0 => no problem
  592. if (!$this->_status) {
  593. return true;
  594. }
  595. // not in original status? strict mode?
  596. if ($this->strict) {
  597. return false;
  598. }
  599. // break up parsing operation of current node
  600. $res = $this->_reparseAfterCurrentBlock ();
  601. if (!$res) {
  602. return false;
  603. }
  604. // this will not cause an infinite loop because
  605. // _reparseAfterCurrentBlock will increase _cpos by one!
  606. return $this->_loop ();
  607. */
  608. }
  609. /**
  610. * Abstract method Append text depending on current status
  611. * @access protected
  612. * @param string $text The text to append
  613. * @return bool On success, the function returns true, else false
  614. */
  615. function _appendText ($text) {
  616. if (!strlen ($text)) {
  617. return true;
  618. }
  619. // default: call _appendToLastTextChild
  620. return $this->_appendToLastTextChild ($text);
  621. }
  622. /**
  623. * Append text to last text child of current top parser stack node
  624. * @access protected
  625. * @param string $text The text to append
  626. * @return bool On success, the function returns true, else false
  627. */
  628. function _appendToLastTextChild ($text) {
  629. $scount = count ($this->_stack);
  630. if ($scount == 0) {
  631. return false;
  632. }
  633. return $this->_stack[$scount-1]->appendToLastTextChild ($text);
  634. }
  635. /**
  636. * Searches {@link StringParser::_text _text} for every needle that is
  637. * specified by using the {@link PHP_MANUAL#strpos strpos} function. It
  638. * returns an associative array with the key <code>'needle'</code>
  639. * pointing at the string that was found first and the key
  640. * <code>'offset'</code> pointing at the offset at which the string was
  641. * found first. If no needle was found, the <code>'needle'</code>
  642. * element is <code>false</code> and the <code>'offset'</code> element
  643. * is <code>-1</code>.
  644. *
  645. * @access protected
  646. * @param array $needles
  647. * @param int $offset
  648. * @return array
  649. * @see StringParser::_text
  650. */
  651. function _strpos ($needles, $offset) {
  652. $cur_needle = false;
  653. $cur_offset = -1;
  654. if ($offset < strlen ($this->_text)) {
  655. foreach ($needles as $needle) {
  656. $n_offset = strpos ($this->_text, $needle, $offset);
  657. if ($n_offset !== false && ($n_offset < $cur_offset || $cur_offset < 0)) {
  658. $cur_needle = $needle;
  659. $cur_offset = $n_offset;
  660. }
  661. }
  662. }
  663. return array ($cur_needle, $cur_offset, 'needle' => $cur_needle, 'offset' => $cur_offset);
  664. }
  665. /**
  666. * Detects a string at the current position
  667. *
  668. * @access protected
  669. * @param array $needles The strings that are to be detected
  670. * @param int $offset The current offset
  671. * @return mixed The string that was detected or the needle
  672. */
  673. function _strDetect ($needles, $offset) {
  674. foreach ($needles as $needle) {
  675. $l = strlen ($needle);
  676. if (substr ($this->_text, $offset, $l) == $needle) {
  677. return $needle;
  678. }
  679. }
  680. return false;
  681. }
  682. /**
  683. * Adds a node to the current parse stack
  684. *
  685. * @access protected
  686. * @param object $node The node that is to be added
  687. * @return bool True on success, else false.
  688. * @see StringParser_Node, StringParser::_stack
  689. */
  690. function _pushNode (&$node) {
  691. $stack_count = count ($this->_stack);
  692. $max_node =& $this->_stack[$stack_count-1];
  693. if (!$max_node->appendChild ($node)) {
  694. return false;
  695. }
  696. $this->_stack[$stack_count] =& $node;
  697. return true;
  698. }
  699. /**
  700. * Removes a node from the current parse stack
  701. *
  702. * @access protected
  703. * @return bool True on success, else false.
  704. * @see StringParser_Node, StringParser::_stack
  705. */
  706. function _popNode () {
  707. $stack_count = count ($this->_stack);
  708. unset ($this->_stack[$stack_count-1]);
  709. return true;
  710. }
  711. /**
  712. * Execute a method on the top element
  713. *
  714. * @access protected
  715. * @return mixed
  716. */
  717. function _topNode () {
  718. $args = func_get_args ();
  719. if (!count ($args)) {
  720. return; // oops?
  721. }
  722. $method = array_shift ($args);
  723. $stack_count = count ($this->_stack);
  724. $method = array (&$this->_stack[$stack_count-1], $method);
  725. if (!is_callable ($method)) {
  726. return; // oops?
  727. }
  728. return call_user_func_array ($method, $args);
  729. }
  730. /**
  731. * Get a variable of the top element
  732. *
  733. * @access protected
  734. * @return mixed
  735. */
  736. function _topNodeVar ($var) {
  737. $stack_count = count ($this->_stack);
  738. return $this->_stack[$stack_count-1]->$var;
  739. }
  740. }
  741.  
  742. /**
  743. * Node type: Unknown node
  744. * @see StringParser_Node::_type
  745. */
  746. define ('STRINGPARSER_NODE_UNKNOWN', 0);
  747.  
  748. /**
  749. * Node type: Root node
  750. * @see StringParser_Node::_type
  751. */
  752. define ('STRINGPARSER_NODE_ROOT', 1);
  753.  
  754. /**
  755. * Node type: Text node
  756. * @see StringParser_Node::_type
  757. */
  758. define ('STRINGPARSER_NODE_TEXT', 2);
  759.  
  760. /**
  761. * Global value that is a counter of string parser node ids. Compare it to a
  762. * sequence in databases.
  763. * @var int
  764. */
  765. $GLOBALS['__STRINGPARSER_NODE_ID'] = 0;
  766.  
  767. /**
  768. * Generic string parser node class
  769. *
  770. * This is an abstract class for any type of node that is used within the
  771. * string parser. General warning: This class contains code regarding references
  772. * that is very tricky. Please do not touch this code unless you exactly know
  773. * what you are doing. Incorrect handling of references may cause PHP to crash
  774. * with a segmentation fault! You have been warned.
  775. *
  776. * @package stringparser
  777. */
  778. class StringParser_Node {
  779. /**
  780. * The type of this node.
  781. *
  782. * There are three standard node types: root node, text node and unknown
  783. * node. All node types are integer constants. Any node type of a
  784. * subclass must be at least 32 to allow future developements.
  785. *
  786. * @access protected
  787. * @var int
  788. * @see STRINGPARSER_NODE_ROOT, STRINGPARSER_NODE_TEXT
  789. * @see STRINGPARSER_NODE_UNKNOWN
  790. */
  791. var $_type = STRINGPARSER_NODE_UNKNOWN;
  792. /**
  793. * The node ID
  794. *
  795. * This ID uniquely identifies this node. This is needed when searching
  796. * for a specific node in the children array. Please note that this is
  797. * only an internal variable and should never be used - not even in
  798. * subclasses and especially not in external data structures. This ID
  799. * has nothing to do with any type of ID in HTML oder XML.
  800. *
  801. * @access protected
  802. * @var int
  803. * @see StringParser_Node::_children
  804. */
  805. var $_id = -1;
  806. /**
  807. * The parent of this node.
  808. *
  809. * It is either null (root node) or a reference to the parent object.
  810. *
  811. * @access protected
  812. * @var mixed
  813. * @see StringParser_Node::_children
  814. */
  815. var $_parent = null;
  816. /**
  817. * The children of this node.
  818. *
  819. * It contains an array of references to all the children nodes of this
  820. * node.
  821. *
  822. * @access protected
  823. * @var array
  824. * @see StringParser_Node::_parent
  825. */
  826. var $_children = array ();
  827. /**
  828. * Occured at
  829. *
  830. * This defines the position in the parsed text where this node occurred
  831. * at. If -1, this value was not possible to be determined.
  832. *
  833. * @access public
  834. * @var int
  835. */
  836. var $occurredAt = -1;
  837. /**
  838. * Constructor
  839. *
  840. * Currently, the constructor only allocates a new ID for the node and
  841. * assigns it.
  842. *
  843. * @access public
  844. * @param int $occurredAt The position in the text where this node
  845. * occurred at. If not determinable, it is -1.
  846. * @global __STRINGPARSER_NODE_ID
  847. */
  848. function StringParser_Node ($occurredAt = -1) {
  849. $this->_id = $GLOBALS['__STRINGPARSER_NODE_ID']++;
  850. $this->occurredAt = $occurredAt;
  851. }
  852. /**
  853. * Type of the node
  854. *
  855. * This function returns the type of the node
  856. *
  857. * @access public
  858. * @return int
  859. */
  860. function type () {
  861. return $this->_type;
  862. }
  863. /**
  864. * Prepend a node
  865. *
  866. * @access public
  867. * @param object $node The node to be prepended.
  868. * @return bool On success, the function returns true, else false.
  869. */
  870. function prependChild (&$node) {
  871. if (!is_object ($node)) {
  872. return false;
  873. }
  874. // root nodes may not be children of other nodes!
  875. if ($node->_type == STRINGPARSER_NODE_ROOT) {
  876. return false;
  877. }
  878. // if node already has a parent
  879. if ($node->_parent !== false) {
  880. // remove node from there
  881. $parent =& $node->_parent;
  882. if (!$parent->removeChild ($node, false)) {
  883. return false;
  884. }
  885. unset ($parent);
  886. }
  887. $index = count ($this->_children) - 1;
  888. // move all nodes to a new index
  889. while ($index >= 0) {
  890. // save object
  891. $object =& $this->_children[$index];
  892. // we have to unset it because else it will be
  893. // overridden in in the loop
  894. unset ($this->_children[$index]);
  895. // put object to new position
  896. $this->_children[$index+1] =& $object;
  897. $index--;
  898. }
  899. $this->_children[0] =& $node;
  900. return true;
  901. }
  902. /**
  903. * Append text to last text child
  904. * @access public
  905. * @param string $text The text to append
  906. * @return bool On success, the function returns true, else false
  907. */
  908. function appendToLastTextChild ($text) {
  909. $ccount = count ($this->_children);
  910. if ($ccount == 0 || $this->_children[$ccount-1]->_type != STRINGPARSER_NODE_TEXT) {
  911. $ntextnode =& new StringParser_Node_Text ($text);
  912. return $this->appendChild ($ntextnode);
  913. } else {
  914. $this->_children[$ccount-1]->appendText ($text);
  915. return true;
  916. }
  917. }
  918. /**
  919. * Append a node to the children
  920. *
  921. * This function appends a node to the children array(). It
  922. * automatically sets the {@link StrinParser_Node::_parent _parent}
  923. * property of the node that is to be appended.
  924. *
  925. * @access public
  926. * @param object $node The node that is to be appended.
  927. * @return bool On success, the function returns true, else false.
  928. */
  929. function appendChild (&$node) {
  930. if (!is_object ($node)) {
  931. return false;
  932. }
  933. // root nodes may not be children of other nodes!
  934. if ($node->_type == STRINGPARSER_NODE_ROOT) {
  935. return false;
  936. }
  937. // if node already has a parent
  938. if ($node->_parent !== null) {
  939. // remove node from there
  940. $parent =& $node->_parent;
  941. if (!$parent->removeChild ($node, false)) {
  942. return false;
  943. }
  944. unset ($parent);
  945. }
  946. // append it to current node
  947. $new_index = count ($this->_children);
  948. $this->_children[$new_index] =& $node;
  949. $node->_parent =& $this;
  950. return true;
  951. }
  952. /**
  953. * Insert a node before another node
  954. *
  955. * @access public
  956. * @param object $node The node to be inserted.
  957. * @param object $reference The reference node where the new node is
  958. * to be inserted before.
  959. * @return bool On success, the function returns true, else false.
  960. */
  961. function insertChildBefore (&$node, &$reference) {
  962. if (!is_object ($node)) {
  963. return false;
  964. }
  965. // root nodes may not be children of other nodes!
  966. if ($node->_type == STRINGPARSER_NODE_ROOT) {
  967. return false;
  968. }
  969. // is the reference node a child?
  970. $child = $this->_findChild ($reference);
  971. if ($child === false) {
  972. return false;
  973. }
  974. // if node already has a parent
  975. if ($node->_parent !== null) {
  976. // remove node from there
  977. $parent =& $node->_parent;
  978. if (!$parent->removeChild ($node, false)) {
  979. return false;
  980. }
  981. unset ($parent);
  982. }
  983. $index = count ($this->_children) - 1;
  984. // move all nodes to a new index
  985. while ($index >= $child) {
  986. // save object
  987. $object =& $this->_children[$index];
  988. // we have to unset it because else it will be
  989. // overridden in in the loop
  990. unset ($this->_children[$index]);
  991. // put object to new position
  992. $this->_children[$index+1] =& $object;
  993. $index--;
  994. }
  995. $this->_children[$child] =& $node;
  996. return true;
  997. }
  998. /**
  999. * Insert a node after another node
  1000. *
  1001. * @access public
  1002. * @param object $node The node to be inserted.
  1003. * @param object $reference The reference node where the new node is
  1004. * to be inserted after.
  1005. * @return bool On success, the function returns true, else false.
  1006. */
  1007. function insertChildAfter (&$node, &$reference) {
  1008. if (!is_object ($node)) {
  1009. return false;
  1010. }
  1011. // root nodes may not be children of other nodes!
  1012. if ($node->_type == STRINGPARSER_NODE_ROOT) {
  1013. return false;
  1014. }
  1015. // is the reference node a child?
  1016. $child = $this->_findChild ($reference);
  1017. if ($child === false) {
  1018. return false;
  1019. }
  1020. // if node already has a parent
  1021. if ($node->_parent !== false) {
  1022. // remove node from there
  1023. $parent =& $node->_parent;
  1024. if (!$parent->removeChild ($node, false)) {
  1025. return false;
  1026. }
  1027. unset ($parent);
  1028. }
  1029. $index = count ($this->_children) - 1;
  1030. // move all nodes to a new index
  1031. while ($index >= $child + 1) {
  1032. // save object
  1033. $object =& $this->_children[$index];
  1034. // we have to unset it because else it will be
  1035. // overridden in in the loop
  1036. unset ($this->_children[$index]);
  1037. // put object to new position
  1038. $this->_children[$index+1] =& $object;
  1039. $index--;
  1040. }
  1041. $this->_children[$child + 1] =& $node;
  1042. return true;
  1043. }
  1044. /**
  1045. * Remove a child node
  1046. *
  1047. * This function removes a child from the children array. A parameter
  1048. * tells the function whether to destroy the child afterwards or not.
  1049. * If the specified node is not a child of this node, the function will
  1050. * return false.
  1051. *
  1052. * @access public
  1053. * @param mixed $child The child to destroy; either an integer
  1054. * specifying the index of the child or a reference
  1055. * to the child itself.
  1056. * @param bool $destroy Destroy the child afterwards.
  1057. * @return bool On success, the function returns true, else false.
  1058. */
  1059. function removeChild (&$child, $destroy = false) {
  1060. if (is_object ($child)) {
  1061. // if object: get index
  1062. $object =& $child;
  1063. unset ($child);
  1064. $child = $this->_findChild ($object);
  1065. if ($child === false) {
  1066. return false;
  1067. }
  1068. } else {
  1069. // remove reference on $child
  1070. $save = $child;
  1071. unset($child);
  1072. $child = $save;
  1073. // else: get object
  1074. if (!isset($this->_children[$child])) {
  1075. return false;
  1076. }
  1077. $object =& $this->_children[$child];
  1078. }
  1079. // store count for later use
  1080. $ccount = count ($this->_children);
  1081. // index out of bounds
  1082. if (!is_int ($child) || $child < 0 || $child >= $ccount) {
  1083. return false;
  1084. }
  1085. // inkonsistency
  1086. if ($this->_children[$child]->_parent === null ||
  1087. $this->_children[$child]->_parent->_id != $this->_id) {
  1088. return false;
  1089. }
  1090. // $object->_parent = null would equal to $this = null
  1091. // as $object->_parent is a reference to $this!
  1092. // because of this, we have to unset the variable to remove
  1093. // the reference and then redeclare the variable
  1094. unset ($object->_parent); $object->_parent = null;
  1095. // we have to unset it because else it will be overridden in
  1096. // in the loop
  1097. unset ($this->_children[$child]);
  1098. // move all remaining objects one index higher
  1099. while ($child < $ccount - 1) {
  1100. // save object
  1101. $obj =& $this->_children[$child+1];
  1102. // we have to unset it because else it will be
  1103. // overridden in in the loop
  1104. unset ($this->_children[$child+1]);
  1105. // put object to new position
  1106. $this->_children[$child] =& $obj;
  1107. // UNSET THE OBJECT!
  1108. unset ($obj);
  1109. $child++;
  1110. }
  1111. if ($destroy) {
  1112. return StringParser_Node::destroyNode ($object);
  1113. unset ($object);
  1114. }
  1115. return true;
  1116. }
  1117. /**
  1118. * Get the first child of this node
  1119. *
  1120. * @access public
  1121. * @return mixed
  1122. */
  1123. function &firstChild () {
  1124. $ret = null;
  1125. if (!count ($this->_children)) {
  1126. return $ret;
  1127. }
  1128. return $this->_children[0];
  1129. }
  1130. /**
  1131. * Get the last child of this node
  1132. *
  1133. * @access public
  1134. * @return mixed
  1135. */
  1136. function &lastChild () {
  1137. $ret = null;
  1138. $c = count ($this->_children);
  1139. if (!$c) {
  1140. return $ret;
  1141. }
  1142. return $this->_children[$c-1];
  1143. }
  1144. /**
  1145. * Destroy a node
  1146. *
  1147. * @access public
  1148. * @static
  1149. * @param object $node The node to destroy
  1150. * @return bool True on success, else false.
  1151. */
  1152. function destroyNode (&$node) {
  1153. if ($node === null) {
  1154. return false;
  1155. }
  1156. // if parent exists: remove node from tree!
  1157. if ($node->_parent !== null) {
  1158. $parent =& $node->_parent;
  1159. // directly return that result because the removeChild
  1160. // method will call destroyNode again
  1161. return $parent->removeChild ($node, true);
  1162. }
  1163. // node has children
  1164. while (count ($node->_children)) {
  1165. $child = 0;
  1166. // remove first child until no more children remain
  1167. if (!$node->removeChild ($child, true)) {
  1168. return false;
  1169. }
  1170. unset($child);
  1171. }
  1172. // now call the nodes destructor
  1173. if (!$node->_destroy ()) {
  1174. return false;
  1175. }
  1176. // now just unset it and prey that there are no more references
  1177. // to this node
  1178. unset ($node);
  1179. return true;
  1180. }
  1181. /**
  1182. * Destroy this node
  1183. *
  1184. *
  1185. * @access protected
  1186. * @return bool True on success, else false.
  1187. */
  1188. function _destroy () {
  1189. return true;
  1190. }
  1191. /**
  1192. * Find a child node
  1193. *
  1194. * This function searches for a node in the own children and returns
  1195. * the index of the node or false if the node is not a child of this
  1196. * node.
  1197. *
  1198. * @access protected
  1199. * @param mixed $child The node to look for.
  1200. * @return mixed The index of the child node on success, else false.
  1201. */
  1202. function _findChild (&$child) {
  1203. if (!is_object ($child)) {
  1204. return false;
  1205. }
  1206. $ccount = count ($this->_children);
  1207. for ($i = 0; $i < $ccount; $i++) {
  1208. if ($this->_children[$i]->_id == $child->_id) {
  1209. return $i;
  1210. }
  1211. }
  1212. return false;
  1213. }
  1214. /**
  1215. * Checks equality of this node and another node
  1216. *
  1217. * @access public
  1218. * @param mixed $node The node to be compared with
  1219. * @return bool True if the other node equals to this node, else false.
  1220. */
  1221. function equals (&$node) {
  1222. return ($this->_id == $node->_id);
  1223. }
  1224. /**
  1225. * Determines whether a criterium matches this node
  1226. *
  1227. * @access public
  1228. * @param string $criterium The criterium that is to be checked
  1229. * @param mixed $value The value that is to be compared
  1230. * @return bool True if this node matches that criterium
  1231. */
  1232. function matchesCriterium ($criterium, $value) {
  1233. return false;
  1234. }
  1235. /**
  1236. * Search for nodes with a certain criterium
  1237. *
  1238. * This may be used to implement getElementsByTagName etc.
  1239. *
  1240. * @access public
  1241. * @param string $criterium The criterium that is to be checked
  1242. * @param mixed $value The value that is to be compared
  1243. * @return array All subnodes that match this criterium
  1244. */
  1245. function &getNodesByCriterium ($criterium, $value) {
  1246. $nodes = array ();
  1247. $node_ctr = 0;
  1248. for ($i = 0; $i < count ($this->_children); $i++) {
  1249. if ($this->_children[$i]->matchesCriterium ($criterium, $value)) {
  1250. $nodes[$node_ctr++] =& $this->_children[$i];
  1251. }
  1252. $subnodes = $this->_children[$i]->getNodesByCriterium ($criterium, $value);
  1253. if (count ($subnodes)) {
  1254. $subnodes_count = count ($subnodes);
  1255. for ($j = 0; $j < $subnodes_count; $j++) {
  1256. $nodes[$node_ctr++] =& $subnodes[$j];
  1257. unset ($subnodes[$j]);
  1258. }
  1259. }
  1260. unset ($subnodes);
  1261. }
  1262. return $nodes;
  1263. }
  1264. /**
  1265. * Search for nodes with a certain criterium and return the count
  1266. *
  1267. * Similar to getNodesByCriterium
  1268. *
  1269. * @access public
  1270. * @param string $criterium The criterium that is to be checked
  1271. * @param mixed $value The value that is to be compared
  1272. * @return int The number of subnodes that match this criterium
  1273. */
  1274. function getNodeCountByCriterium ($criterium, $value) {
  1275. $node_ctr = 0;
  1276. for ($i = 0; $i < count ($this->_children); $i++) {
  1277. if ($this->_children[$i]->matchesCriterium ($criterium, $value)) {
  1278. $node_ctr++;
  1279. }
  1280. $subnodes = $this->_children[$i]->getNodeCountByCriterium ($criterium, $value);
  1281. $node_ctr += $subnodes;
  1282. }
  1283. return $node_ctr;
  1284. }
  1285. /**
  1286. * Dump nodes
  1287. *
  1288. * This dumps a tree of nodes
  1289. *
  1290. * @access public
  1291. * @param string $prefix The prefix that is to be used for indentation
  1292. * @param string $linesep The line separator
  1293. * @param int $level The initial level of indentation
  1294. * @return string
  1295. */
  1296. function dump ($prefix = " ", $linesep = "\n", $level = 0) {
  1297. $str = str_repeat ($prefix, $level) . $this->_id . ": " . $this->_dumpToString () . $linesep;
  1298. for ($i = 0; $i < count ($this->_children); $i++) {
  1299. $str .= $this->_children[$i]->dump ($prefix, $linesep, $level + 1);
  1300. }
  1301. return $str;
  1302. }
  1303. /**
  1304. * Dump this node to a string
  1305. *
  1306. * @access protected
  1307. * @return string
  1308. */
  1309. function _dumpToString () {
  1310. if ($this->_type == STRINGPARSER_NODE_ROOT) {
  1311. return "root";
  1312. }
  1313. return (string)$this->_type;
  1314. }
  1315. }
  1316.  
  1317. /**
  1318. * String parser root node class
  1319. *
  1320. * @package stringparser
  1321. */
  1322. class StringParser_Node_Root extends StringParser_Node {
  1323. /**
  1324. * The type of this node.
  1325. *
  1326. * This node is a root node.
  1327. *
  1328. * @access protected
  1329. * @var int
  1330. * @see STRINGPARSER_NODE_ROOT
  1331. */
  1332. var $_type = STRINGPARSER_NODE_ROOT;
  1333. }
  1334.  
  1335. /**
  1336. * String parser text node class
  1337. *
  1338. * @package stringparser
  1339. */
  1340. class StringParser_Node_Text extends StringParser_Node {
  1341. /**
  1342. * The type of this node.
  1343. *
  1344. * This node is a text node.
  1345. *
  1346. * @access protected
  1347. * @var int
  1348. * @see STRINGPARSER_NODE_TEXT
  1349. */
  1350. var $_type = STRINGPARSER_NODE_TEXT;
  1351. /**
  1352. * Node flags
  1353. *
  1354. * @access protected
  1355. * @var array
  1356. */
  1357. var $_flags = array ();
  1358. /**
  1359. * The content of this node
  1360. * @access public
  1361. * @var string
  1362. */
  1363. var $content = '';
  1364. /**
  1365. * Constructor
  1366. *
  1367. * @access public
  1368. * @param string $content The initial content of this element
  1369. * @param int $occurredAt The position in the text where this node
  1370. * occurred at. If not determinable, it is -1.
  1371. * @see StringParser_Node_Text::content
  1372. */
  1373. function StringParser_Node_Text ($content, $occurredAt = -1) {
  1374. parent::StringParser_Node ($occurredAt);
  1375. $this->content = $content;
  1376. }
  1377. /**
  1378. * Append text to content
  1379. *
  1380. * @access public
  1381. * @param string $text The text to append
  1382. * @see StringParser_Node_Text::content
  1383. */
  1384. function appendText ($text) {
  1385. $this->content .= $text;
  1386. }
  1387. /**
  1388. * Set a flag
  1389. *
  1390. * @access public
  1391. * @param string $name The name of the flag
  1392. * @param mixed $value The value of the flag
  1393. */
  1394. function setFlag ($name, $value) {
  1395. $this->_flags[$name] = $value;
  1396. return true;
  1397. }
  1398. /**
  1399. * Get Flag
  1400. *
  1401. * @access public
  1402. * @param string $flag The requested flag
  1403. * @param string $type The requested type of the return value
  1404. * @param mixed $default The default return value
  1405. */
  1406. function getFlag ($flag, $type = 'mixed', $default = null) {
  1407. if (!isset ($this->_flags[$flag])) {
  1408. return $default;
  1409. }
  1410. $return = $this->_flags[$flag];
  1411. if ($type != 'mixed') {
  1412. settype ($return, $type);
  1413. }
  1414. return $return;
  1415. }
  1416. /**
  1417. * Dump this node to a string
  1418. */
  1419. function _dumpToString () {
  1420. return "text \"".substr (preg_replace ('/\s+/', ' ', $this->content), 0, 40)."\" [f:".preg_replace ('/\s+/', ' ', join(':', array_keys ($this->_flags)))."]";
  1421. }
  1422. }
  1423.  
  1424. ?>

Documentation generated on Mon, 24 Apr 2006 10:18:34 +0200 by phpDocumentor 1.3.0RC5