It takes into account array keys with the same name and uses an increment for them instead of overwriting the keys.
I am using it for SOAP requests (20K - 150K) and it parses very fast compared to PEAR.
<?
class XMLParser {
// raw xml
private $rawXML;
// xml parser
private $parser = null;
// array returned by the xml parser
private $valueArray = array();
private $keyArray = array();
// arrays for dealing with duplicate keys
private $duplicateKeys = array();
// return data
private $output = array();
private $status;
public function XMLParser($xml){
$this->rawXML = $xml;
$this->parser = xml_parser_create();
return $this->parse();
}
private function parse(){
$parser = $this->parser;
xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0); // Dont mess with my cAsE sEtTings
xml_parser_set_option($parser, XML_OPTION_SKIP_WHITE, 1); // Dont bother with empty info
if(!xml_parse_into_struct($parser, $this->rawXML, $this->valueArray, $this->keyArray)){
$this->status = 'error: '.xml_error_string(xml_get_error_code($parser)).' at line '.xml_get_current_line_number($parser);
return false;
}
xml_parser_free($parser);
$this->findDuplicateKeys();
// tmp array used for stacking
$stack = array();
$increment = 0;
foreach($this->valueArray as $val) {
if($val['type'] == "open") {
//if array key is duplicate then send in increment
if(array_key_exists($val['tag'], $this->duplicateKeys)){
array_push($stack, $this->duplicateKeys[$val['tag']]);
$this->duplicateKeys[$val['tag']]++;
}
else{
// else send in tag
array_push($stack, $val['tag']);
}
} elseif($val['type'] == "close") {
array_pop($stack);
// reset the increment if they tag does not exists in the stack
if(array_key_exists($val['tag'], $stack)){
$this->duplicateKeys[$val['tag']] = 0;
}
} elseif($val['type'] == "complete") {
//if array key is duplicate then send in increment
if(array_key_exists($val['tag'], $this->duplicateKeys)){
array_push($stack, $this->duplicateKeys[$val['tag']]);
$this->duplicateKeys[$val['tag']]++;
}
else{
// else send in tag
array_push($stack, $val['tag']);
}
$this->setArrayValue($this->output, $stack, $val['value']);
array_pop($stack);
}
$increment++;
}
$this->status = 'success: xml was parsed';
return true;
}
private function findDuplicateKeys(){
for($i=0;$i < count($this->valueArray); $i++) {
// duplicate keys are when two complete tags are side by side
if($this->valueArray[$i]['type'] == "complete"){
if( $i+1 < count($this->valueArray) ){
if($this->valueArray[$i+1]['tag'] == $this->valueArray[$i]['tag'] && $this->valueArray[$i+1]['type'] == "complete"){
$this->duplicateKeys[$this->valueArray[$i]['tag']] = 0;
}
}
}
// also when a close tag is before an open tag and the tags are the same
if($this->valueArray[$i]['type'] == "close"){
if( $i+1 < count($this->valueArray) ){
if( $this->valueArray[$i+1]['type'] == "open" && $this->valueArray[$i+1]['tag'] == $this->valueArray[$i]['tag'])
$this->duplicateKeys[$this->valueArray[$i]['tag']] = 0;
}
}
}
}
private function setArrayValue(&$array, $stack, $value){
if ($stack) {
$key = array_shift($stack);
$this->setArrayValue($array[$key], $stack, $value);
return $array;
} else {
$array = $value;
}
}
public function getOutput(){
return $this->output;
}
public function getStatus(){
return $this->status;
}
}
?gt&;
Usage:
$p = new XMLParser($xml);
$p->getOutput();
Thursday, September 18, 2008
XMLParser
POSTED BY
Oriol
AT
3:00 PM
0
Comments
parsemoldb.php - parses moldb.xml into an array of molecular objects
<?php
class AminoAcid {
var $name; // aa name
var $symbol; // three letter symbol
var $code; // one letter code
var $type; // hydrophobic, charged or neutral
function AminoAcid ($aa)
{
foreach ($aa as $k=>$v)
$this->$k = $aa[$k];
}
}
function readDatabase($filename)
{
// read the XML database of aminoacids
$data = implode("", file($filename));
$parser = xml_parser_create();
xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0);
xml_parser_set_option($parser, XML_OPTION_SKIP_WHITE, 1);
xml_parse_into_struct($parser, $data, $values, $tags);
xml_parser_free($parser);
// loop through the structures
foreach ($tags as $key=>$val) {
if ($key == "molecule") {
$molranges = $val;
// each contiguous pair of array entries are the
// lower and upper range for each molecule definition
for ($i=0; $i < count($molranges); $i+=2) {
$offset = $molranges[$i] + 1;
$len = $molranges[$i + 1] - $offset;
$tdb[] = parseMol(array_slice($values, $offset, $len));
}
} else {
continue;
}
}
return $tdb;
}
function parseMol($mvalues)
{
for ($i=0; $i < count($mvalues); $i++) {
$mol[$mvalues[$i]["tag"]] = $mvalues[$i]["value"];
}
return new AminoAcid($mol);
}
$db = readDatabase("moldb.xml");
echo "** Database of AminoAcid objects:\n";
print_r($db);
?>
POSTED BY
Oriol
AT
3:00 PM
0
Comments
Sunday, September 14, 2008
Get to All The CDATA
<?php
function printCDATA($pItem)
{
foreach($pItem as $value){
if (isset($value[children])){
printCDATA($value[children]);
}else{
if(isset($value[name])){
echo $value[name].": ";
}
if(isset($value[cdata])){
echo $value[cdata]."
";
}
}
}
}
printCDATA($stack);
?>
POSTED BY
Oriol
AT
7:37 PM
0
Comments
xml_parse
This is a follow up to the parser class posted by neoyahuu at yahoo dot com. The xml_set_character_data_handler function falls prey to the weird splitting caused by special characters (i.e. new lines whenever an umlaut is found) - my fix just uses concatenation to stop this from happening. This is a great function otherwise. The code:
<?php
class xx_xml {
// XML parser variables
var $parser;
var $name;
var $attr;
var $data = array();
var $stack = array();
var $keys;
var $path;
// either you pass url atau contents.
// Use 'url' or 'contents' for the parameter
var $type;
// function with the default parameter value
function xx_xml($url='http://www.opocot.com', $type='url') {
$this->type = $type;
$this->url = $url;
$this->parse();
}
// parse XML data
function parse()
{
$data = '';
$this->parser = xml_parser_create ("UTF-8");
xml_set_object($this->parser, $this);
xml_set_element_handler($this->parser, 'startXML', 'endXML');
xml_set_character_data_handler($this->parser, 'charXML');
xml_parser_set_option($this->parser, XML_OPTION_CASE_FOLDING, false);
if ($this->type == 'url') {
// if use type = 'url' now we open the XML with fopen
if (!($fp = @fopen($this->url, 'rb'))) {
$this->error("Cannot open {$this->url}");
}
while (($data = fread($fp, 8192))) {
if (!xml_parse($this->parser, $data, feof($fp))) {
$this->error(sprintf('XML error at line %d column %d',
xml_get_current_line_number($this->parser),
xml_get_current_column_number($this->parser)));
}
}
} else if ($this->type == 'contents') {
// Now we can pass the contents, maybe if you want
// to use CURL, SOCK or other method.
$lines = explode("\n",$this->url);
foreach ($lines as $val) {
if (trim($val) == '')
continue;
$data = $val . "\n";
if (!xml_parse($this->parser, $data)) {
echo $data.'
';
$this->error(sprintf('XML error at line %d column %d',
xml_get_current_line_number($this->parser),
xml_get_current_column_number($this->parser)));
}
}
}
}
function startXML($parser, $name, $attr) {
$this->stack[$name] = array();
$keys = '';
$total = count($this->stack)-1;
$i=0;
foreach ($this->stack as $key => $val) {
if (count($this->stack) > 1) {
if ($total == $i)
$keys .= $key;
else
$keys .= $key . '|'; // The saparator
}
else
$keys .= $key;
$i++;
}
if (array_key_exists($keys, $this->data)) {
$this->data[$keys][] = $attr;
} else
$this->data[$keys] = $attr;
$this->keys = $keys;
}
function endXML($parser, $name) {
end($this->stack);
if (key($this->stack) == $name)
array_pop($this->stack);
}
function charXML($parser, $data) {
if (trim($data) != '')
@$startFrom = count($this->data[$this->keys])-1; // fixes weird splitting (bug?)
@$startFrom = $startFrom == -1 ? $startFrom = 0 : $startFrom;
@$this->data[$this->keys]['data'][$startFrom] .= trim(str_replace("\n", '', $data));
}
function error($msg) {
echo "
Error: $msg
exit();
}
}
?>
POSTED BY
Oriol
AT
7:31 PM
0
Comments
Friday, September 12, 2008
Map XML to HTML
$file = "data.xml";
$map_array = array(
"BOLD" => "B",
"EMPHASIS" => "I",
"LITERAL" => "TT"
);
function startElement($parser, $name, $attrs)
{
global $map_array;
if (isset($map_array[$name])) {
echo "<$map_array[$name]>";
}
}
function endElement($parser, $name)
{
global $map_array;
if (isset($map_array[$name])) {
echo "</$map_array[$name]>";
}
}
function characterData($parser, $data)
{
echo $data;
}
$xml_parser = xml_parser_create();
// use case-folding so we are sure to find the tag in $map_array
xml_parser_set_option($xml_parser, XML_OPTION_CASE_FOLDING, true);
xml_set_element_handler($xml_parser, "startElement", "endElement");
xml_set_character_data_handler($xml_parser, "characterData");
if (!($fp = fopen($file, "r"))) {
die( "could not open XML input");
}
while ($data = fread($fp, 4096)) {
if (!xml_parse($xml_parser, $data, feof($fp))) {
die(sprintf( "XML error: %s at line %d",
xml_error_string(xml_get_error_code($xml_parser)),
xml_get_current_line_number($xml_parser)));
}
}
xml_parser_free($xml_parser);
?>
POSTED BY
Oriol
AT
6:32 AM
0
Comments