blob: 64af7afd4ae233419c9a4c6b433e82f328bcd1ea [file] [log] [blame]
The Android Open Source Project54b6cfa2008-10-21 07:00:00 -07001#include "XMLHandler.h"
2
3#include <expat.h>
4#include <stdio.h>
5#include <string.h>
6#include <fcntl.h>
7#include <unistd.h>
8#include <errno.h>
9
10#define NS_SEPARATOR 1
11#define MORE_INDENT " "
12
13static string
14xml_text_escape(const string& s)
15{
16 string result;
17 const size_t N = s.length();
18 for (size_t i=0; i<N; i++) {
19 char c = s[i];
20 switch (c) {
21 case '<':
22 result += "&lt;";
23 break;
24 case '>':
25 result += "&gt;";
26 break;
27 case '&':
28 result += "&amp;";
29 break;
30 default:
31 result += c;
32 break;
33 }
34 }
35 return result;
36}
37
38static string
39xml_attr_escape(const string& s)
40{
41 string result;
42 const size_t N = s.length();
43 for (size_t i=0; i<N; i++) {
44 char c = s[i];
45 switch (c) {
46 case '\"':
47 result += "&quot;";
48 break;
49 default:
50 result += c;
51 break;
52 }
53 }
54 return result;
55}
56
57XMLNamespaceMap::XMLNamespaceMap()
58{
59}
60
61XMLNamespaceMap::XMLNamespaceMap(char const*const* nspaces)
62
63{
64 while (*nspaces) {
65 m_map[nspaces[1]] = nspaces[0];
66 nspaces += 2;
67 }
68}
69
70string
71XMLNamespaceMap::Get(const string& ns) const
72{
73 if (ns == "xml") {
74 return ns;
75 }
76 map<string,string>::const_iterator it = m_map.find(ns);
77 if (it == m_map.end()) {
78 return "";
79 } else {
80 return it->second;
81 }
82}
83
84string
85XMLNamespaceMap::GetPrefix(const string& ns) const
86{
87 if (ns == "") {
88 return "";
89 }
90 map<string,string>::const_iterator it = m_map.find(ns);
91 if (it != m_map.end()) {
92 if (it->second == "") {
93 return "";
94 } else {
95 return it->second + ":";
96 }
97 } else {
98 return ":"; // invalid
99 }
100}
101
102void
103XMLNamespaceMap::AddToAttributes(vector<XMLAttribute>* attrs) const
104{
105 map<string,string>::const_iterator it;
106 for (it=m_map.begin(); it!=m_map.end(); it++) {
107 if (it->second == "xml") {
108 continue;
109 }
110 XMLAttribute attr;
111 if (it->second == "") {
112 attr.name = "xmlns";
113 } else {
114 attr.name = "xmlns:";
115 attr.name += it->second;
116 }
117 attr.value = it->first;
118 attrs->push_back(attr);
119 }
120}
121
122XMLAttribute::XMLAttribute()
123{
124}
125
126XMLAttribute::XMLAttribute(const XMLAttribute& that)
127 :ns(that.ns),
128 name(that.name),
129 value(that.value)
130{
131}
132
133XMLAttribute::XMLAttribute(string n, string na, string v)
134 :ns(n),
135 name(na),
136 value(v)
137{
138}
139
140XMLAttribute::~XMLAttribute()
141{
142}
143
144int
145XMLAttribute::Compare(const XMLAttribute& that) const
146{
147 if (ns != that.ns) {
148 return ns < that.ns ? -1 : 1;
149 }
150 if (name != that.name) {
151 return name < that.name ? -1 : 1;
152 }
153 return 0;
154}
155
156string
157XMLAttribute::Find(const vector<XMLAttribute>& list, const string& ns, const string& name,
158 const string& def)
159{
160 const size_t N = list.size();
161 for (size_t i=0; i<N; i++) {
162 const XMLAttribute& attr = list[i];
163 if (attr.ns == ns && attr.name == name) {
164 return attr.value;
165 }
166 }
167 return def;
168}
169
170struct xml_handler_data {
171 vector<XMLHandler*> stack;
172 XML_Parser parser;
173 vector<vector<XMLAttribute>*> attributes;
174 string filename;
175};
176
177XMLNode::XMLNode()
178{
179}
180
181XMLNode::~XMLNode()
182{
183// for_each(m_children.begin(), m_children.end(), delete_object<XMLNode>);
184}
185
186XMLNode*
187XMLNode::Clone() const
188{
189 switch (m_type) {
190 case ELEMENT: {
191 XMLNode* e = XMLNode::NewElement(m_pos, m_ns, m_name, m_attrs, m_pretty);
192 const size_t N = m_children.size();
193 for (size_t i=0; i<N; i++) {
194 e->m_children.push_back(m_children[i]->Clone());
195 }
196 return e;
197 }
198 case TEXT: {
199 return XMLNode::NewText(m_pos, m_text, m_pretty);
200 }
201 default:
202 return NULL;
203 }
204}
205
206XMLNode*
207XMLNode::NewElement(const SourcePos& pos, const string& ns, const string& name,
208 const vector<XMLAttribute>& attrs, int pretty)
209{
210 XMLNode* node = new XMLNode();
211 node->m_type = ELEMENT;
212 node->m_pretty = pretty;
213 node->m_pos = pos;
214 node->m_ns = ns;
215 node->m_name = name;
216 node->m_attrs = attrs;
217 return node;
218}
219
220XMLNode*
221XMLNode::NewText(const SourcePos& pos, const string& text, int pretty)
222{
223 XMLNode* node = new XMLNode();
224 node->m_type = TEXT;
225 node->m_pretty = pretty;
226 node->m_pos = pos;
227 node->m_text = text;
228 return node;
229}
230
231void
232XMLNode::SetPrettyRecursive(int value)
233{
234 m_pretty = value;
235 const size_t N = m_children.size();
236 for (size_t i=0; i<N; i++) {
237 m_children[i]->SetPrettyRecursive(value);
238 }
239}
240
241string
242XMLNode::ContentsToString(const XMLNamespaceMap& nspaces) const
243{
244 return contents_to_string(nspaces, "");
245}
246
247string
248XMLNode::ToString(const XMLNamespaceMap& nspaces) const
249{
250 return to_string(nspaces, "");
251}
252
253string
254XMLNode::OpenTagToString(const XMLNamespaceMap& nspaces, int pretty) const
255{
256 return open_tag_to_string(nspaces, "", pretty);
257}
258
259string
260XMLNode::contents_to_string(const XMLNamespaceMap& nspaces, const string& indent) const
261{
262 string result;
263 const size_t N = m_children.size();
264 for (size_t i=0; i<N; i++) {
265 const XMLNode* child = m_children[i];
266 switch (child->Type()) {
267 case ELEMENT:
268 if (m_pretty == PRETTY) {
269 result += '\n';
270 result += indent;
271 }
272 case TEXT:
273 result += child->to_string(nspaces, indent);
274 break;
275 }
276 }
277 return result;
278}
279
280string
281trim_string(const string& str)
282{
283 const char* p = str.c_str();
284 while (*p && isspace(*p)) {
285 p++;
286 }
287 const char* q = str.c_str() + str.length() - 1;
288 while (q > p && isspace(*q)) {
289 q--;
290 }
291 q++;
292 return string(p, q-p);
293}
294
295string
296XMLNode::open_tag_to_string(const XMLNamespaceMap& nspaces, const string& indent, int pretty) const
297{
298 if (m_type != ELEMENT) {
299 return "";
300 }
301 string result = "<";
302 result += nspaces.GetPrefix(m_ns);
303 result += m_name;
304
305 vector<XMLAttribute> attrs = m_attrs;
306
307 sort(attrs.begin(), attrs.end());
308
309 const size_t N = attrs.size();
310 for (size_t i=0; i<N; i++) {
311 const XMLAttribute& attr = attrs[i];
312 if (i == 0 || m_pretty == EXACT || pretty == EXACT) {
313 result += ' ';
314 }
315 else {
316 result += "\n";
317 result += indent;
318 result += MORE_INDENT;
319 result += MORE_INDENT;
320 }
321 result += nspaces.GetPrefix(attr.ns);
322 result += attr.name;
323 result += "=\"";
324 result += xml_attr_escape(attr.value);
325 result += '\"';
326 }
327
328 if (m_children.size() > 0) {
329 result += '>';
330 } else {
331 result += " />";
332 }
333 return result;
334}
335
336string
337XMLNode::to_string(const XMLNamespaceMap& nspaces, const string& indent) const
338{
339 switch (m_type)
340 {
341 case TEXT: {
342 if (m_pretty == EXACT) {
343 return xml_text_escape(m_text);
344 } else {
345 return xml_text_escape(trim_string(m_text));
346 }
347 }
348 case ELEMENT: {
349 string result = open_tag_to_string(nspaces, indent, PRETTY);
350
351 if (m_children.size() > 0) {
352 result += contents_to_string(nspaces, indent + MORE_INDENT);
353
354 if (m_pretty == PRETTY && m_children.size() > 0) {
355 result += '\n';
356 result += indent;
357 }
358
359 result += "</";
360 result += nspaces.GetPrefix(m_ns);
361 result += m_name;
362 result += '>';
363 }
364 return result;
365 }
366 default:
367 return "";
368 }
369}
370
371string
372XMLNode::CollapseTextContents() const
373{
374 if (m_type == TEXT) {
375 return m_text;
376 }
377 else if (m_type == ELEMENT) {
378 string result;
379
380 const size_t N=m_children.size();
381 for (size_t i=0; i<N; i++) {
382 result += m_children[i]->CollapseTextContents();
383 }
384
385 return result;
386 }
387 else {
388 return "";
389 }
390}
391
392vector<XMLNode*>
393XMLNode::GetElementsByName(const string& ns, const string& name) const
394{
395 vector<XMLNode*> result;
396 const size_t N=m_children.size();
397 for (size_t i=0; i<N; i++) {
398 XMLNode* child = m_children[i];
399 if (child->m_type == ELEMENT && child->m_ns == ns && child->m_name == name) {
400 result.push_back(child);
401 }
402 }
403 return result;
404}
405
406XMLNode*
407XMLNode::GetElementByNameAt(const string& ns, const string& name, size_t index) const
408{
409 vector<XMLNode*> result;
410 const size_t N=m_children.size();
411 for (size_t i=0; i<N; i++) {
412 XMLNode* child = m_children[i];
413 if (child->m_type == ELEMENT && child->m_ns == ns && child->m_name == name) {
414 if (index == 0) {
415 return child;
416 } else {
417 index--;
418 }
419 }
420 }
421 return NULL;
422}
423
424size_t
425XMLNode::CountElementsByName(const string& ns, const string& name) const
426{
427 size_t result = 0;
428 const size_t N=m_children.size();
429 for (size_t i=0; i<N; i++) {
430 XMLNode* child = m_children[i];
431 if (child->m_type == ELEMENT && child->m_ns == ns && child->m_name == name) {
432 result++;
433 }
434 }
435 return result;
436}
437
438string
439XMLNode::GetAttribute(const string& ns, const string& name, const string& def) const
440{
441 return XMLAttribute::Find(m_attrs, ns, name, def);
442}
443
444static void
445parse_namespace(const char* data, string* ns, string* name)
446{
447 const char* p = strchr(data, NS_SEPARATOR);
448 if (p != NULL) {
449 ns->assign(data, p-data);
450 name->assign(p+1);
451 } else {
452 ns->assign("");
453 name->assign(data);
454 }
455}
456
457static void
458convert_attrs(const char** in, vector<XMLAttribute>* out)
459{
460 while (*in) {
461 XMLAttribute attr;
462 parse_namespace(in[0], &attr.ns, &attr.name);
463 attr.value = in[1];
464 out->push_back(attr);
465 in += 2;
466 }
467}
468
469static bool
470list_contains(const vector<XMLHandler*>& stack, XMLHandler* handler)
471{
472 const size_t N = stack.size();
473 for (size_t i=0; i<N; i++) {
474 if (stack[i] == handler) {
475 return true;
476 }
477 }
478 return false;
479}
480
481static void XMLCALL
482start_element_handler(void *userData, const char *name, const char **attrs)
483{
484 xml_handler_data* data = (xml_handler_data*)userData;
485
486 XMLHandler* handler = data->stack[data->stack.size()-1];
487
488 SourcePos pos(data->filename, (int)XML_GetCurrentLineNumber(data->parser));
489 string nsString;
490 string nameString;
491 XMLHandler* next = handler;
492 vector<XMLAttribute> attributes;
493
494 parse_namespace(name, &nsString, &nameString);
495 convert_attrs(attrs, &attributes);
496
497 handler->OnStartElement(pos, nsString, nameString, attributes, &next);
498
499 if (next == NULL) {
500 next = handler;
501 }
502
503 if (next != handler) {
504 next->elementPos = pos;
505 next->elementNamespace = nsString;
506 next->elementName = nameString;
507 next->elementAttributes = attributes;
508 }
509
510 data->stack.push_back(next);
511}
512
513static void XMLCALL
514end_element_handler(void *userData, const char *name)
515{
516 xml_handler_data* data = (xml_handler_data*)userData;
517
518 XMLHandler* handler = data->stack[data->stack.size()-1];
519 data->stack.pop_back();
520
521 SourcePos pos(data->filename, (int)XML_GetCurrentLineNumber(data->parser));
522
523 if (!list_contains(data->stack, handler)) {
524 handler->OnDone(pos);
525 if (data->stack.size() > 1) {
526 // not top one
527 delete handler;
528 }
529 }
530
531 handler = data->stack[data->stack.size()-1];
532
533 string nsString;
534 string nameString;
535
536 parse_namespace(name, &nsString, &nameString);
537
538 handler->OnEndElement(pos, nsString, nameString);
539}
540
541static void XMLCALL
542text_handler(void *userData, const XML_Char *s, int len)
543{
544 xml_handler_data* data = (xml_handler_data*)userData;
545 XMLHandler* handler = data->stack[data->stack.size()-1];
546 SourcePos pos(data->filename, (int)XML_GetCurrentLineNumber(data->parser));
547 handler->OnText(pos, string(s, len));
548}
549
550static void XMLCALL
551comment_handler(void *userData, const char *comment)
552{
553 xml_handler_data* data = (xml_handler_data*)userData;
554 XMLHandler* handler = data->stack[data->stack.size()-1];
555 SourcePos pos(data->filename, (int)XML_GetCurrentLineNumber(data->parser));
556 handler->OnComment(pos, string(comment));
557}
558
559bool
560XMLHandler::ParseFile(const string& filename, XMLHandler* handler)
561{
562 char buf[16384];
563 int fd = open(filename.c_str(), O_RDONLY);
564 if (fd < 0) {
565 SourcePos(filename, -1).Error("Unable to open file for read: %s", strerror(errno));
566 return false;
567 }
568
569 XML_Parser parser = XML_ParserCreateNS(NULL, NS_SEPARATOR);
570 xml_handler_data state;
571 state.stack.push_back(handler);
572 state.parser = parser;
573 state.filename = filename;
574
575 XML_SetUserData(parser, &state);
576 XML_SetElementHandler(parser, start_element_handler, end_element_handler);
577 XML_SetCharacterDataHandler(parser, text_handler);
578 XML_SetCommentHandler(parser, comment_handler);
579
580 ssize_t len;
581 bool done;
582 do {
583 len = read(fd, buf, sizeof(buf));
584 done = len < (ssize_t)sizeof(buf);
585 if (len < 0) {
586 SourcePos(filename, -1).Error("Error reading file: %s\n", strerror(errno));
587 close(fd);
588 return false;
589 }
590 if (XML_Parse(parser, buf, len, done) == XML_STATUS_ERROR) {
591 SourcePos(filename, (int)XML_GetCurrentLineNumber(parser)).Error(
592 "Error parsing XML: %s\n", XML_ErrorString(XML_GetErrorCode(parser)));
593 close(fd);
594 return false;
595 }
596 } while (!done);
597
598 XML_ParserFree(parser);
599
600 close(fd);
601
602 return true;
603}
604
605bool
606XMLHandler::ParseString(const string& filename, const string& text, XMLHandler* handler)
607{
608 XML_Parser parser = XML_ParserCreateNS(NULL, NS_SEPARATOR);
609 xml_handler_data state;
610 state.stack.push_back(handler);
611 state.parser = parser;
612 state.filename = filename;
613
614 XML_SetUserData(parser, &state);
615 XML_SetElementHandler(parser, start_element_handler, end_element_handler);
616 XML_SetCharacterDataHandler(parser, text_handler);
617 XML_SetCommentHandler(parser, comment_handler);
618
619 if (XML_Parse(parser, text.c_str(), text.size(), true) == XML_STATUS_ERROR) {
620 SourcePos(filename, (int)XML_GetCurrentLineNumber(parser)).Error(
621 "Error parsing XML: %s\n", XML_ErrorString(XML_GetErrorCode(parser)));
622 return false;
623 }
624
625 XML_ParserFree(parser);
626
627 return true;
628}
629
630XMLHandler::XMLHandler()
631{
632}
633
634XMLHandler::~XMLHandler()
635{
636}
637
638int
639XMLHandler::OnStartElement(const SourcePos& pos, const string& ns, const string& name,
640 const vector<XMLAttribute>& attrs, XMLHandler** next)
641{
642 return 0;
643}
644
645int
646XMLHandler::OnEndElement(const SourcePos& pos, const string& ns, const string& name)
647{
648 return 0;
649}
650
651int
652XMLHandler::OnText(const SourcePos& pos, const string& text)
653{
654 return 0;
655}
656
657int
658XMLHandler::OnComment(const SourcePos& pos, const string& text)
659{
660 return 0;
661}
662
663int
664XMLHandler::OnDone(const SourcePos& pos)
665{
666 return 0;
667}
668
669TopElementHandler::TopElementHandler(const string& ns, const string& name, XMLHandler* next)
670 :m_ns(ns),
671 m_name(name),
672 m_next(next)
673{
674}
675
676int
677TopElementHandler::OnStartElement(const SourcePos& pos, const string& ns, const string& name,
678 const vector<XMLAttribute>& attrs, XMLHandler** next)
679{
680 *next = m_next;
681 return 0;
682}
683
684int
685TopElementHandler::OnEndElement(const SourcePos& pos, const string& ns, const string& name)
686{
687 return 0;
688}
689
690int
691TopElementHandler::OnText(const SourcePos& pos, const string& text)
692{
693 return 0;
694}
695
696int
697TopElementHandler::OnDone(const SourcePos& pos)
698{
699 return 0;
700}
701
702
703NodeHandler::NodeHandler(XMLNode* root, int pretty)
704 :m_root(root),
705 m_pretty(pretty)
706{
707 if (root != NULL) {
708 m_nodes.push_back(root);
709 }
710}
711
712NodeHandler::~NodeHandler()
713{
714}
715
716int
717NodeHandler::OnStartElement(const SourcePos& pos, const string& ns, const string& name,
718 const vector<XMLAttribute>& attrs, XMLHandler** next)
719{
720 int pretty;
721 if (XMLAttribute::Find(attrs, XMLNS_XMLNS, "space", "") == "preserve") {
722 pretty = XMLNode::EXACT;
723 } else {
724 if (m_root == NULL) {
725 pretty = m_pretty;
726 } else {
727 pretty = m_nodes[m_nodes.size()-1]->Pretty();
728 }
729 }
730 XMLNode* n = XMLNode::NewElement(pos, ns, name, attrs, pretty);
731 if (m_root == NULL) {
732 m_root = n;
733 } else {
734 m_nodes[m_nodes.size()-1]->EditChildren().push_back(n);
735 }
736 m_nodes.push_back(n);
737 return 0;
738}
739
740int
741NodeHandler::OnEndElement(const SourcePos& pos, const string& ns, const string& name)
742{
743 m_nodes.pop_back();
744 return 0;
745}
746
747int
748NodeHandler::OnText(const SourcePos& pos, const string& text)
749{
750 if (m_root == NULL) {
751 return 1;
752 }
753 XMLNode* n = XMLNode::NewText(pos, text, m_nodes[m_nodes.size()-1]->Pretty());
754 m_nodes[m_nodes.size()-1]->EditChildren().push_back(n);
755 return 0;
756}
757
758int
759NodeHandler::OnComment(const SourcePos& pos, const string& text)
760{
761 return 0;
762}
763
764int
765NodeHandler::OnDone(const SourcePos& pos)
766{
767 return 0;
768}
769
770XMLNode*
771NodeHandler::ParseFile(const string& filename, int pretty)
772{
773 NodeHandler handler(NULL, pretty);
774 if (!XMLHandler::ParseFile(filename, &handler)) {
775 fprintf(stderr, "error parsing file: %s\n", filename.c_str());
776 return NULL;
777 }
778 return handler.Root();
779}
780
781XMLNode*
782NodeHandler::ParseString(const string& filename, const string& text, int pretty)
783{
784 NodeHandler handler(NULL, pretty);
785 if (!XMLHandler::ParseString(filename, text, &handler)) {
786 fprintf(stderr, "error parsing file: %s\n", filename.c_str());
787 return NULL;
788 }
789 return handler.Root();
790}
791
792