PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
xml.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * xml.c
4  * XML data type support.
5  *
6  *
7  * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * src/backend/utils/adt/xml.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 
15 /*
16  * Generally, XML type support is only available when libxml use was
17  * configured during the build. But even if that is not done, the
18  * type and all the functions are available, but most of them will
19  * fail. For one thing, this avoids having to manage variant catalog
20  * installations. But it also has nice effects such as that you can
21  * dump a database containing XML type data even if the server is not
22  * linked with libxml. Thus, make sure xml_out() works even if nothing
23  * else does.
24  */
25 
26 /*
27  * Notes on memory management:
28  *
29  * Sometimes libxml allocates global structures in the hope that it can reuse
30  * them later on. This makes it impractical to change the xmlMemSetup
31  * functions on-the-fly; that is likely to lead to trying to pfree() chunks
32  * allocated with malloc() or vice versa. Since libxml might be used by
33  * loadable modules, eg libperl, our only safe choices are to change the
34  * functions at postmaster/backend launch or not at all. Since we'd rather
35  * not activate libxml in sessions that might never use it, the latter choice
36  * is the preferred one. However, for debugging purposes it can be awfully
37  * handy to constrain libxml's allocations to be done in a specific palloc
38  * context, where they're easy to track. Therefore there is code here that
39  * can be enabled in debug builds to redirect libxml's allocations into a
40  * special context LibxmlContext. It's not recommended to turn this on in
41  * a production build because of the possibility of bad interactions with
42  * external modules.
43  */
44 /* #define USE_LIBXMLCONTEXT */
45 
46 #include "postgres.h"
47 
48 #ifdef USE_LIBXML
49 #include <libxml/chvalid.h>
50 #include <libxml/parser.h>
51 #include <libxml/parserInternals.h>
52 #include <libxml/tree.h>
53 #include <libxml/uri.h>
54 #include <libxml/xmlerror.h>
55 #include <libxml/xmlversion.h>
56 #include <libxml/xmlwriter.h>
57 #include <libxml/xpath.h>
58 #include <libxml/xpathInternals.h>
59 
60 /*
61  * We used to check for xmlStructuredErrorContext via a configure test; but
62  * that doesn't work on Windows, so instead use this grottier method of
63  * testing the library version number.
64  */
65 #if LIBXML_VERSION >= 20704
66 #define HAVE_XMLSTRUCTUREDERRORCONTEXT 1
67 #endif
68 #endif /* USE_LIBXML */
69 
70 #include "access/htup_details.h"
71 #include "catalog/namespace.h"
72 #include "catalog/pg_type.h"
73 #include "commands/dbcommands.h"
74 #include "executor/executor.h"
75 #include "executor/spi.h"
76 #include "fmgr.h"
77 #include "lib/stringinfo.h"
78 #include "libpq/pqformat.h"
79 #include "mb/pg_wchar.h"
80 #include "miscadmin.h"
81 #include "nodes/execnodes.h"
82 #include "nodes/nodeFuncs.h"
83 #include "utils/array.h"
84 #include "utils/builtins.h"
85 #include "utils/date.h"
86 #include "utils/datetime.h"
87 #include "utils/lsyscache.h"
88 #include "utils/memutils.h"
89 #include "utils/rel.h"
90 #include "utils/syscache.h"
91 #include "utils/xml.h"
92 
93 
94 /* GUC variables */
97 
98 #ifdef USE_LIBXML
99 
100 /* random number to identify PgXmlErrorContext */
101 #define ERRCXT_MAGIC 68275028
102 
103 struct PgXmlErrorContext
104 {
105  int magic;
106  /* strictness argument passed to pg_xml_init */
107  PgXmlStrictness strictness;
108  /* current error status and accumulated message, if any */
109  bool err_occurred;
110  StringInfoData err_buf;
111  /* previous libxml error handling state (saved by pg_xml_init) */
112  xmlStructuredErrorFunc saved_errfunc;
113  void *saved_errcxt;
114  /* previous libxml entity handler (saved by pg_xml_init) */
115  xmlExternalEntityLoader saved_entityfunc;
116 };
117 
118 static xmlParserInputPtr xmlPgEntityLoader(const char *URL, const char *ID,
119  xmlParserCtxtPtr ctxt);
120 static void xml_errorHandler(void *data, xmlErrorPtr error);
121 static void xml_ereport_by_code(int level, int sqlcode,
122  const char *msg, int errcode);
123 static void chopStringInfoNewlines(StringInfo str);
124 static void appendStringInfoLineSeparator(StringInfo str);
125 
126 #ifdef USE_LIBXMLCONTEXT
127 
128 static MemoryContext LibxmlContext = NULL;
129 
130 static void xml_memory_init(void);
131 static void *xml_palloc(size_t size);
132 static void *xml_repalloc(void *ptr, size_t size);
133 static void xml_pfree(void *ptr);
134 static char *xml_pstrdup(const char *string);
135 #endif /* USE_LIBXMLCONTEXT */
136 
137 static xmlChar *xml_text2xmlChar(text *in);
138 static int parse_xml_decl(const xmlChar *str, size_t *lenp,
139  xmlChar **version, xmlChar **encoding, int *standalone);
140 static bool print_xml_decl(StringInfo buf, const xmlChar *version,
141  pg_enc encoding, int standalone);
142 static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
143  bool preserve_whitespace, int encoding);
144 static text *xml_xmlnodetoxmltype(xmlNodePtr cur);
145 static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
146  ArrayBuildState **astate);
147 #endif /* USE_LIBXML */
148 
149 static StringInfo query_to_xml_internal(const char *query, char *tablename,
150  const char *xmlschema, bool nulls, bool tableforest,
151  const char *targetns, bool top_level);
152 static const char *map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid,
153  bool nulls, bool tableforest, const char *targetns);
154 static const char *map_sql_schema_to_xmlschema_types(Oid nspid,
155  List *relid_list, bool nulls,
156  bool tableforest, const char *targetns);
157 static const char *map_sql_catalog_to_xmlschema_types(List *nspid_list,
158  bool nulls, bool tableforest,
159  const char *targetns);
160 static const char *map_sql_type_to_xml_name(Oid typeoid, int typmod);
161 static const char *map_sql_typecoll_to_xmlschema_types(List *tupdesc_list);
162 static const char *map_sql_type_to_xmlschema_type(Oid typeoid, int typmod);
163 static void SPI_sql_row_to_xmlelement(int rownum, StringInfo result,
164  char *tablename, bool nulls, bool tableforest,
165  const char *targetns, bool top_level);
166 
167 #define NO_XML_SUPPORT() \
168  ereport(ERROR, \
169  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \
170  errmsg("unsupported XML feature"), \
171  errdetail("This functionality requires the server to be built with libxml support."), \
172  errhint("You need to rebuild PostgreSQL using --with-libxml.")))
173 
174 
175 /* from SQL/XML:2008 section 4.9 */
176 #define NAMESPACE_XSD "http://www.w3.org/2001/XMLSchema"
177 #define NAMESPACE_XSI "http://www.w3.org/2001/XMLSchema-instance"
178 #define NAMESPACE_SQLXML "http://standards.iso.org/iso/9075/2003/sqlxml"
179 
180 
181 #ifdef USE_LIBXML
182 
183 static int
184 xmlChar_to_encoding(const xmlChar *encoding_name)
185 {
186  int encoding = pg_char_to_encoding((const char *) encoding_name);
187 
188  if (encoding < 0)
189  ereport(ERROR,
190  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
191  errmsg("invalid encoding name \"%s\"",
192  (const char *) encoding_name)));
193  return encoding;
194 }
195 #endif
196 
197 
198 /*
199  * xml_in uses a plain C string to VARDATA conversion, so for the time being
200  * we use the conversion function for the text datatype.
201  *
202  * This is only acceptable so long as xmltype and text use the same
203  * representation.
204  */
205 Datum
207 {
208 #ifdef USE_LIBXML
209  char *s = PG_GETARG_CSTRING(0);
210  xmltype *vardata;
211  xmlDocPtr doc;
212 
213  vardata = (xmltype *) cstring_to_text(s);
214 
215  /*
216  * Parse the data to check if it is well-formed XML data. Assume that
217  * ERROR occurred if parsing failed.
218  */
219  doc = xml_parse(vardata, xmloption, true, GetDatabaseEncoding());
220  xmlFreeDoc(doc);
221 
222  PG_RETURN_XML_P(vardata);
223 #else
224  NO_XML_SUPPORT();
225  return 0;
226 #endif
227 }
228 
229 
230 #define PG_XML_DEFAULT_VERSION "1.0"
231 
232 
233 /*
234  * xml_out_internal uses a plain VARDATA to C string conversion, so for the
235  * time being we use the conversion function for the text datatype.
236  *
237  * This is only acceptable so long as xmltype and text use the same
238  * representation.
239  */
240 static char *
241 xml_out_internal(xmltype *x, pg_enc target_encoding)
242 {
243  char *str = text_to_cstring((text *) x);
244 
245 #ifdef USE_LIBXML
246  size_t len = strlen(str);
247  xmlChar *version;
248  int standalone;
249  int res_code;
250 
251  if ((res_code = parse_xml_decl((xmlChar *) str,
252  &len, &version, NULL, &standalone)) == 0)
253  {
255 
256  initStringInfo(&buf);
257 
258  if (!print_xml_decl(&buf, version, target_encoding, standalone))
259  {
260  /*
261  * If we are not going to produce an XML declaration, eat a single
262  * newline in the original string to prevent empty first lines in
263  * the output.
264  */
265  if (*(str + len) == '\n')
266  len += 1;
267  }
268  appendStringInfoString(&buf, str + len);
269 
270  pfree(str);
271 
272  return buf.data;
273  }
274 
275  xml_ereport_by_code(WARNING, ERRCODE_INTERNAL_ERROR,
276  "could not parse XML declaration in stored value",
277  res_code);
278 #endif
279  return str;
280 }
281 
282 
283 Datum
285 {
286  xmltype *x = PG_GETARG_XML_P(0);
287 
288  /*
289  * xml_out removes the encoding property in all cases. This is because we
290  * cannot control from here whether the datum will be converted to a
291  * different client encoding, so we'd do more harm than good by including
292  * it.
293  */
295 }
296 
297 
298 Datum
300 {
301 #ifdef USE_LIBXML
303  xmltype *result;
304  char *str;
305  char *newstr;
306  int nbytes;
307  xmlDocPtr doc;
308  xmlChar *encodingStr = NULL;
309  int encoding;
310 
311  /*
312  * Read the data in raw format. We don't know yet what the encoding is, as
313  * that information is embedded in the xml declaration; so we have to
314  * parse that before converting to server encoding.
315  */
316  nbytes = buf->len - buf->cursor;
317  str = (char *) pq_getmsgbytes(buf, nbytes);
318 
319  /*
320  * We need a null-terminated string to pass to parse_xml_decl(). Rather
321  * than make a separate copy, make the temporary result one byte bigger
322  * than it needs to be.
323  */
324  result = palloc(nbytes + 1 + VARHDRSZ);
325  SET_VARSIZE(result, nbytes + VARHDRSZ);
326  memcpy(VARDATA(result), str, nbytes);
327  str = VARDATA(result);
328  str[nbytes] = '\0';
329 
330  parse_xml_decl((const xmlChar *) str, NULL, NULL, &encodingStr, NULL);
331 
332  /*
333  * If encoding wasn't explicitly specified in the XML header, treat it as
334  * UTF-8, as that's the default in XML. This is different from xml_in(),
335  * where the input has to go through the normal client to server encoding
336  * conversion.
337  */
338  encoding = encodingStr ? xmlChar_to_encoding(encodingStr) : PG_UTF8;
339 
340  /*
341  * Parse the data to check if it is well-formed XML data. Assume that
342  * xml_parse will throw ERROR if not.
343  */
344  doc = xml_parse(result, xmloption, true, encoding);
345  xmlFreeDoc(doc);
346 
347  /* Now that we know what we're dealing with, convert to server encoding */
348  newstr = pg_any_to_server(str, nbytes, encoding);
349 
350  if (newstr != str)
351  {
352  pfree(result);
353  result = (xmltype *) cstring_to_text(newstr);
354  pfree(newstr);
355  }
356 
357  PG_RETURN_XML_P(result);
358 #else
359  NO_XML_SUPPORT();
360  return 0;
361 #endif
362 }
363 
364 
365 Datum
367 {
368  xmltype *x = PG_GETARG_XML_P(0);
369  char *outval;
371 
372  /*
373  * xml_out_internal doesn't convert the encoding, it just prints the right
374  * declaration. pq_sendtext will do the conversion.
375  */
377 
378  pq_begintypsend(&buf);
379  pq_sendtext(&buf, outval, strlen(outval));
380  pfree(outval);
382 }
383 
384 
385 #ifdef USE_LIBXML
386 static void
387 appendStringInfoText(StringInfo str, const text *t)
388 {
390 }
391 #endif
392 
393 
394 static xmltype *
396 {
397  return (xmltype *) cstring_to_text_with_len(buf->data, buf->len);
398 }
399 
400 
401 static xmltype *
402 cstring_to_xmltype(const char *string)
403 {
404  return (xmltype *) cstring_to_text(string);
405 }
406 
407 
408 #ifdef USE_LIBXML
409 static xmltype *
410 xmlBuffer_to_xmltype(xmlBufferPtr buf)
411 {
412  return (xmltype *) cstring_to_text_with_len((const char *) xmlBufferContent(buf),
413  xmlBufferLength(buf));
414 }
415 #endif
416 
417 
418 Datum
420 {
421 #ifdef USE_LIBXML
422  text *arg = PG_GETARG_TEXT_P(0);
423  char *argdata = VARDATA(arg);
424  int len = VARSIZE(arg) - VARHDRSZ;
426  int i;
427 
428  /* check for "--" in string or "-" at the end */
429  for (i = 1; i < len; i++)
430  {
431  if (argdata[i] == '-' && argdata[i - 1] == '-')
432  ereport(ERROR,
433  (errcode(ERRCODE_INVALID_XML_COMMENT),
434  errmsg("invalid XML comment")));
435  }
436  if (len > 0 && argdata[len - 1] == '-')
437  ereport(ERROR,
438  (errcode(ERRCODE_INVALID_XML_COMMENT),
439  errmsg("invalid XML comment")));
440 
441  initStringInfo(&buf);
442  appendStringInfoString(&buf, "<!--");
443  appendStringInfoText(&buf, arg);
444  appendStringInfoString(&buf, "-->");
445 
447 #else
448  NO_XML_SUPPORT();
449  return 0;
450 #endif
451 }
452 
453 
454 
455 /*
456  * TODO: xmlconcat needs to merge the notations and unparsed entities
457  * of the argument values. Not very important in practice, though.
458  */
459 xmltype *
461 {
462 #ifdef USE_LIBXML
463  int global_standalone = 1;
464  xmlChar *global_version = NULL;
465  bool global_version_no_value = false;
467  ListCell *v;
468 
469  initStringInfo(&buf);
470  foreach(v, args)
471  {
473  size_t len;
474  xmlChar *version;
475  int standalone;
476  char *str;
477 
478  len = VARSIZE(x) - VARHDRSZ;
479  str = text_to_cstring((text *) x);
480 
481  parse_xml_decl((xmlChar *) str, &len, &version, NULL, &standalone);
482 
483  if (standalone == 0 && global_standalone == 1)
484  global_standalone = 0;
485  if (standalone < 0)
486  global_standalone = -1;
487 
488  if (!version)
489  global_version_no_value = true;
490  else if (!global_version)
491  global_version = version;
492  else if (xmlStrcmp(version, global_version) != 0)
493  global_version_no_value = true;
494 
495  appendStringInfoString(&buf, str + len);
496  pfree(str);
497  }
498 
499  if (!global_version_no_value || global_standalone >= 0)
500  {
501  StringInfoData buf2;
502 
503  initStringInfo(&buf2);
504 
505  print_xml_decl(&buf2,
506  (!global_version_no_value) ? global_version : NULL,
507  0,
508  global_standalone);
509 
510  appendStringInfoString(&buf2, buf.data);
511  buf = buf2;
512  }
513 
514  return stringinfo_to_xmltype(&buf);
515 #else
516  NO_XML_SUPPORT();
517  return NULL;
518 #endif
519 }
520 
521 
522 /*
523  * XMLAGG support
524  */
525 Datum
527 {
528  if (PG_ARGISNULL(0))
529  {
530  if (PG_ARGISNULL(1))
531  PG_RETURN_NULL();
532  else
534  }
535  else if (PG_ARGISNULL(1))
537  else
539  PG_GETARG_XML_P(1))));
540 }
541 
542 
543 Datum
545 {
546  text *data = PG_GETARG_TEXT_P(0);
547 
548  PG_RETURN_XML_P(xmlparse(data, xmloption, true));
549 }
550 
551 
552 Datum
554 {
555  xmltype *data = PG_GETARG_XML_P(0);
556 
557  /* It's actually binary compatible. */
558  PG_RETURN_TEXT_P((text *) data);
559 }
560 
561 
562 text *
564 {
565  if (xmloption_arg == XMLOPTION_DOCUMENT && !xml_is_document(data))
566  ereport(ERROR,
567  (errcode(ERRCODE_NOT_AN_XML_DOCUMENT),
568  errmsg("not an XML document")));
569 
570  /* It's actually binary compatible, save for the above check. */
571  return (text *) data;
572 }
573 
574 
575 xmltype *
576 xmlelement(XmlExprState *xmlExpr, ExprContext *econtext)
577 {
578 #ifdef USE_LIBXML
579  XmlExpr *xexpr = (XmlExpr *) xmlExpr->xprstate.expr;
580  xmltype *result;
581  List *named_arg_strings;
582  List *arg_strings;
583  int i;
584  ListCell *arg;
585  ListCell *narg;
586  PgXmlErrorContext *xmlerrcxt;
587  volatile xmlBufferPtr buf = NULL;
588  volatile xmlTextWriterPtr writer = NULL;
589 
590  /*
591  * We first evaluate all the arguments, then start up libxml and create
592  * the result. This avoids issues if one of the arguments involves a call
593  * to some other function or subsystem that wants to use libxml on its own
594  * terms.
595  */
596  named_arg_strings = NIL;
597  i = 0;
598  foreach(arg, xmlExpr->named_args)
599  {
600  ExprState *e = (ExprState *) lfirst(arg);
601  Datum value;
602  bool isnull;
603  char *str;
604 
605  value = ExecEvalExpr(e, econtext, &isnull, NULL);
606  if (isnull)
607  str = NULL;
608  else
609  str = map_sql_value_to_xml_value(value, exprType((Node *) e->expr), false);
610  named_arg_strings = lappend(named_arg_strings, str);
611  i++;
612  }
613 
614  arg_strings = NIL;
615  foreach(arg, xmlExpr->args)
616  {
617  ExprState *e = (ExprState *) lfirst(arg);
618  Datum value;
619  bool isnull;
620  char *str;
621 
622  value = ExecEvalExpr(e, econtext, &isnull, NULL);
623  /* here we can just forget NULL elements immediately */
624  if (!isnull)
625  {
626  str = map_sql_value_to_xml_value(value,
627  exprType((Node *) e->expr), true);
628  arg_strings = lappend(arg_strings, str);
629  }
630  }
631 
632  /* now safe to run libxml */
633  xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
634 
635  PG_TRY();
636  {
637  buf = xmlBufferCreate();
638  if (buf == NULL || xmlerrcxt->err_occurred)
639  xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
640  "could not allocate xmlBuffer");
641  writer = xmlNewTextWriterMemory(buf, 0);
642  if (writer == NULL || xmlerrcxt->err_occurred)
643  xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
644  "could not allocate xmlTextWriter");
645 
646  xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name);
647 
648  forboth(arg, named_arg_strings, narg, xexpr->arg_names)
649  {
650  char *str = (char *) lfirst(arg);
651  char *argname = strVal(lfirst(narg));
652 
653  if (str)
654  xmlTextWriterWriteAttribute(writer,
655  (xmlChar *) argname,
656  (xmlChar *) str);
657  }
658 
659  foreach(arg, arg_strings)
660  {
661  char *str = (char *) lfirst(arg);
662 
663  xmlTextWriterWriteRaw(writer, (xmlChar *) str);
664  }
665 
666  xmlTextWriterEndElement(writer);
667 
668  /* we MUST do this now to flush data out to the buffer ... */
669  xmlFreeTextWriter(writer);
670  writer = NULL;
671 
672  result = xmlBuffer_to_xmltype(buf);
673  }
674  PG_CATCH();
675  {
676  if (writer)
677  xmlFreeTextWriter(writer);
678  if (buf)
679  xmlBufferFree(buf);
680 
681  pg_xml_done(xmlerrcxt, true);
682 
683  PG_RE_THROW();
684  }
685  PG_END_TRY();
686 
687  xmlBufferFree(buf);
688 
689  pg_xml_done(xmlerrcxt, false);
690 
691  return result;
692 #else
693  NO_XML_SUPPORT();
694  return NULL;
695 #endif
696 }
697 
698 
699 xmltype *
700 xmlparse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace)
701 {
702 #ifdef USE_LIBXML
703  xmlDocPtr doc;
704 
705  doc = xml_parse(data, xmloption_arg, preserve_whitespace,
707  xmlFreeDoc(doc);
708 
709  return (xmltype *) data;
710 #else
711  NO_XML_SUPPORT();
712  return NULL;
713 #endif
714 }
715 
716 
717 xmltype *
718 xmlpi(char *target, text *arg, bool arg_is_null, bool *result_is_null)
719 {
720 #ifdef USE_LIBXML
721  xmltype *result;
723 
724  if (pg_strcasecmp(target, "xml") == 0)
725  ereport(ERROR,
726  (errcode(ERRCODE_SYNTAX_ERROR), /* really */
727  errmsg("invalid XML processing instruction"),
728  errdetail("XML processing instruction target name cannot be \"%s\".", target)));
729 
730  /*
731  * Following the SQL standard, the null check comes after the syntax check
732  * above.
733  */
734  *result_is_null = arg_is_null;
735  if (*result_is_null)
736  return NULL;
737 
738  initStringInfo(&buf);
739 
740  appendStringInfo(&buf, "<?%s", target);
741 
742  if (arg != NULL)
743  {
744  char *string;
745 
746  string = text_to_cstring(arg);
747  if (strstr(string, "?>") != NULL)
748  ereport(ERROR,
749  (errcode(ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION),
750  errmsg("invalid XML processing instruction"),
751  errdetail("XML processing instruction cannot contain \"?>\".")));
752 
753  appendStringInfoChar(&buf, ' ');
754  appendStringInfoString(&buf, string + strspn(string, " "));
755  pfree(string);
756  }
757  appendStringInfoString(&buf, "?>");
758 
759  result = stringinfo_to_xmltype(&buf);
760  pfree(buf.data);
761  return result;
762 #else
763  NO_XML_SUPPORT();
764  return NULL;
765 #endif
766 }
767 
768 
769 xmltype *
770 xmlroot(xmltype *data, text *version, int standalone)
771 {
772 #ifdef USE_LIBXML
773  char *str;
774  size_t len;
775  xmlChar *orig_version;
776  int orig_standalone;
778 
779  len = VARSIZE(data) - VARHDRSZ;
780  str = text_to_cstring((text *) data);
781 
782  parse_xml_decl((xmlChar *) str, &len, &orig_version, NULL, &orig_standalone);
783 
784  if (version)
785  orig_version = xml_text2xmlChar(version);
786  else
787  orig_version = NULL;
788 
789  switch (standalone)
790  {
791  case XML_STANDALONE_YES:
792  orig_standalone = 1;
793  break;
794  case XML_STANDALONE_NO:
795  orig_standalone = 0;
796  break;
798  orig_standalone = -1;
799  break;
801  /* leave original value */
802  break;
803  }
804 
805  initStringInfo(&buf);
806  print_xml_decl(&buf, orig_version, 0, orig_standalone);
807  appendStringInfoString(&buf, str + len);
808 
809  return stringinfo_to_xmltype(&buf);
810 #else
811  NO_XML_SUPPORT();
812  return NULL;
813 #endif
814 }
815 
816 
817 /*
818  * Validate document (given as string) against DTD (given as external link)
819  *
820  * This has been removed because it is a security hole: unprivileged users
821  * should not be able to use Postgres to fetch arbitrary external files,
822  * which unfortunately is exactly what libxml is willing to do with the DTD
823  * parameter.
824  */
825 Datum
827 {
828  ereport(ERROR,
829  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
830  errmsg("xmlvalidate is not implemented")));
831  return 0;
832 }
833 
834 
835 bool
837 {
838 #ifdef USE_LIBXML
839  bool result;
840  volatile xmlDocPtr doc = NULL;
842 
843  /* We want to catch ereport(INVALID_XML_DOCUMENT) and return false */
844  PG_TRY();
845  {
846  doc = xml_parse((text *) arg, XMLOPTION_DOCUMENT, true,
848  result = true;
849  }
850  PG_CATCH();
851  {
852  ErrorData *errdata;
853  MemoryContext ecxt;
854 
855  ecxt = MemoryContextSwitchTo(ccxt);
856  errdata = CopyErrorData();
857  if (errdata->sqlerrcode == ERRCODE_INVALID_XML_DOCUMENT)
858  {
859  FlushErrorState();
860  result = false;
861  }
862  else
863  {
864  MemoryContextSwitchTo(ecxt);
865  PG_RE_THROW();
866  }
867  }
868  PG_END_TRY();
869 
870  if (doc)
871  xmlFreeDoc(doc);
872 
873  return result;
874 #else /* not USE_LIBXML */
875  NO_XML_SUPPORT();
876  return false;
877 #endif /* not USE_LIBXML */
878 }
879 
880 
881 #ifdef USE_LIBXML
882 
883 /*
884  * pg_xml_init_library --- set up for use of libxml
885  *
886  * This should be called by each function that is about to use libxml
887  * facilities but doesn't require error handling. It initializes libxml
888  * and verifies compatibility with the loaded libxml version. These are
889  * once-per-session activities.
890  *
891  * TODO: xmlChar is utf8-char, make proper tuning (initdb with enc!=utf8 and
892  * check)
893  */
894 void
896 {
897  static bool first_time = true;
898 
899  if (first_time)
900  {
901  /* Stuff we need do only once per session */
902 
903  /*
904  * Currently, we have no pure UTF-8 support for internals -- check if
905  * we can work.
906  */
907  if (sizeof(char) != sizeof(xmlChar))
908  ereport(ERROR,
909  (errmsg("could not initialize XML library"),
910  errdetail("libxml2 has incompatible char type: sizeof(char)=%u, sizeof(xmlChar)=%u.",
911  (int) sizeof(char), (int) sizeof(xmlChar))));
912 
913 #ifdef USE_LIBXMLCONTEXT
914  /* Set up libxml's memory allocation our way */
915  xml_memory_init();
916 #endif
917 
918  /* Check library compatibility */
919  LIBXML_TEST_VERSION;
920 
921  first_time = false;
922  }
923 }
924 
925 /*
926  * pg_xml_init --- set up for use of libxml and register an error handler
927  *
928  * This should be called by each function that is about to use libxml
929  * facilities and requires error handling. It initializes libxml with
930  * pg_xml_init_library() and establishes our libxml error handler.
931  *
932  * strictness determines which errors are reported and which are ignored.
933  *
934  * Calls to this function MUST be followed by a PG_TRY block that guarantees
935  * that pg_xml_done() is called during either normal or error exit.
936  *
937  * This is exported for use by contrib/xml2, as well as other code that might
938  * wish to share use of this module's libxml error handler.
939  */
941 pg_xml_init(PgXmlStrictness strictness)
942 {
943  PgXmlErrorContext *errcxt;
944  void *new_errcxt;
945 
946  /* Do one-time setup if needed */
948 
949  /* Create error handling context structure */
950  errcxt = (PgXmlErrorContext *) palloc(sizeof(PgXmlErrorContext));
951  errcxt->magic = ERRCXT_MAGIC;
952  errcxt->strictness = strictness;
953  errcxt->err_occurred = false;
954  initStringInfo(&errcxt->err_buf);
955 
956  /*
957  * Save original error handler and install ours. libxml originally didn't
958  * distinguish between the contexts for generic and for structured error
959  * handlers. If we're using an old libxml version, we must thus save the
960  * generic error context, even though we're using a structured error
961  * handler.
962  */
963  errcxt->saved_errfunc = xmlStructuredError;
964 
965 #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
966  errcxt->saved_errcxt = xmlStructuredErrorContext;
967 #else
968  errcxt->saved_errcxt = xmlGenericErrorContext;
969 #endif
970 
971  xmlSetStructuredErrorFunc((void *) errcxt, xml_errorHandler);
972 
973  /*
974  * Verify that xmlSetStructuredErrorFunc set the context variable we
975  * expected it to. If not, the error context pointer we just saved is not
976  * the correct thing to restore, and since that leaves us without a way to
977  * restore the context in pg_xml_done, we must fail.
978  *
979  * The only known situation in which this test fails is if we compile with
980  * headers from a libxml2 that doesn't track the structured error context
981  * separately (< 2.7.4), but at runtime use a version that does, or vice
982  * versa. The libxml2 authors did not treat that change as constituting
983  * an ABI break, so the LIBXML_TEST_VERSION test in pg_xml_init_library
984  * fails to protect us from this.
985  */
986 
987 #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
988  new_errcxt = xmlStructuredErrorContext;
989 #else
990  new_errcxt = xmlGenericErrorContext;
991 #endif
992 
993  if (new_errcxt != (void *) errcxt)
994  ereport(ERROR,
995  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
996  errmsg("could not set up XML error handler"),
997  errhint("This probably indicates that the version of libxml2"
998  " being used is not compatible with the libxml2"
999  " header files that PostgreSQL was built with.")));
1000 
1001  /*
1002  * Also, install an entity loader to prevent unwanted fetches of external
1003  * files and URLs.
1004  */
1005  errcxt->saved_entityfunc = xmlGetExternalEntityLoader();
1006  xmlSetExternalEntityLoader(xmlPgEntityLoader);
1007 
1008  return errcxt;
1009 }
1010 
1011 
1012 /*
1013  * pg_xml_done --- restore previous libxml error handling
1014  *
1015  * Resets libxml's global error-handling state to what it was before
1016  * pg_xml_init() was called.
1017  *
1018  * This routine verifies that all pending errors have been dealt with
1019  * (in assert-enabled builds, anyway).
1020  */
1021 void
1022 pg_xml_done(PgXmlErrorContext *errcxt, bool isError)
1023 {
1024  void *cur_errcxt;
1025 
1026  /* An assert seems like enough protection here */
1027  Assert(errcxt->magic == ERRCXT_MAGIC);
1028 
1029  /*
1030  * In a normal exit, there should be no un-handled libxml errors. But we
1031  * shouldn't try to enforce this during error recovery, since the longjmp
1032  * could have been thrown before xml_ereport had a chance to run.
1033  */
1034  Assert(!errcxt->err_occurred || isError);
1035 
1036  /*
1037  * Check that libxml's global state is correct, warn if not. This is a
1038  * real test and not an Assert because it has a higher probability of
1039  * happening.
1040  */
1041 #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1042  cur_errcxt = xmlStructuredErrorContext;
1043 #else
1044  cur_errcxt = xmlGenericErrorContext;
1045 #endif
1046 
1047  if (cur_errcxt != (void *) errcxt)
1048  elog(WARNING, "libxml error handling state is out of sync with xml.c");
1049 
1050  /* Restore the saved handlers */
1051  xmlSetStructuredErrorFunc(errcxt->saved_errcxt, errcxt->saved_errfunc);
1052  xmlSetExternalEntityLoader(errcxt->saved_entityfunc);
1053 
1054  /*
1055  * Mark the struct as invalid, just in case somebody somehow manages to
1056  * call xml_errorHandler or xml_ereport with it.
1057  */
1058  errcxt->magic = 0;
1059 
1060  /* Release memory */
1061  pfree(errcxt->err_buf.data);
1062  pfree(errcxt);
1063 }
1064 
1065 
1066 /*
1067  * pg_xml_error_occurred() --- test the error flag
1068  */
1069 bool
1071 {
1072  return errcxt->err_occurred;
1073 }
1074 
1075 
1076 /*
1077  * SQL/XML allows storing "XML documents" or "XML content". "XML
1078  * documents" are specified by the XML specification and are parsed
1079  * easily by libxml. "XML content" is specified by SQL/XML as the
1080  * production "XMLDecl? content". But libxml can only parse the
1081  * "content" part, so we have to parse the XML declaration ourselves
1082  * to complete this.
1083  */
1084 
1085 #define CHECK_XML_SPACE(p) \
1086  do { \
1087  if (!xmlIsBlank_ch(*(p))) \
1088  return XML_ERR_SPACE_REQUIRED; \
1089  } while (0)
1090 
1091 #define SKIP_XML_SPACE(p) \
1092  while (xmlIsBlank_ch(*(p))) (p)++
1093 
1094 /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
1095 /* Beware of multiple evaluations of argument! */
1096 #define PG_XMLISNAMECHAR(c) \
1097  (xmlIsBaseChar_ch(c) || xmlIsIdeographicQ(c) \
1098  || xmlIsDigit_ch(c) \
1099  || c == '.' || c == '-' || c == '_' || c == ':' \
1100  || xmlIsCombiningQ(c) \
1101  || xmlIsExtender_ch(c))
1102 
1103 /* pnstrdup, but deal with xmlChar not char; len is measured in xmlChars */
1104 static xmlChar *
1105 xml_pnstrdup(const xmlChar *str, size_t len)
1106 {
1107  xmlChar *result;
1108 
1109  result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
1110  memcpy(result, str, len * sizeof(xmlChar));
1111  result[len] = 0;
1112  return result;
1113 }
1114 
1115 /*
1116  * str is the null-terminated input string. Remaining arguments are
1117  * output arguments; each can be NULL if value is not wanted.
1118  * version and encoding are returned as locally-palloc'd strings.
1119  * Result is 0 if OK, an error code if not.
1120  */
1121 static int
1122 parse_xml_decl(const xmlChar *str, size_t *lenp,
1123  xmlChar **version, xmlChar **encoding, int *standalone)
1124 {
1125  const xmlChar *p;
1126  const xmlChar *save_p;
1127  size_t len;
1128  int utf8char;
1129  int utf8len;
1130 
1131  /*
1132  * Only initialize libxml. We don't need error handling here, but we do
1133  * need to make sure libxml is initialized before calling any of its
1134  * functions. Note that this is safe (and a no-op) if caller has already
1135  * done pg_xml_init().
1136  */
1138 
1139  /* Initialize output arguments to "not present" */
1140  if (version)
1141  *version = NULL;
1142  if (encoding)
1143  *encoding = NULL;
1144  if (standalone)
1145  *standalone = -1;
1146 
1147  p = str;
1148 
1149  if (xmlStrncmp(p, (xmlChar *) "<?xml", 5) != 0)
1150  goto finished;
1151 
1152  /* if next char is name char, it's a PI like <?xml-stylesheet ...?> */
1153  utf8len = strlen((const char *) (p + 5));
1154  utf8char = xmlGetUTF8Char(p + 5, &utf8len);
1155  if (PG_XMLISNAMECHAR(utf8char))
1156  goto finished;
1157 
1158  p += 5;
1159 
1160  /* version */
1161  CHECK_XML_SPACE(p);
1162  SKIP_XML_SPACE(p);
1163  if (xmlStrncmp(p, (xmlChar *) "version", 7) != 0)
1164  return XML_ERR_VERSION_MISSING;
1165  p += 7;
1166  SKIP_XML_SPACE(p);
1167  if (*p != '=')
1168  return XML_ERR_VERSION_MISSING;
1169  p += 1;
1170  SKIP_XML_SPACE(p);
1171 
1172  if (*p == '\'' || *p == '"')
1173  {
1174  const xmlChar *q;
1175 
1176  q = xmlStrchr(p + 1, *p);
1177  if (!q)
1178  return XML_ERR_VERSION_MISSING;
1179 
1180  if (version)
1181  *version = xml_pnstrdup(p + 1, q - p - 1);
1182  p = q + 1;
1183  }
1184  else
1185  return XML_ERR_VERSION_MISSING;
1186 
1187  /* encoding */
1188  save_p = p;
1189  SKIP_XML_SPACE(p);
1190  if (xmlStrncmp(p, (xmlChar *) "encoding", 8) == 0)
1191  {
1192  CHECK_XML_SPACE(save_p);
1193  p += 8;
1194  SKIP_XML_SPACE(p);
1195  if (*p != '=')
1196  return XML_ERR_MISSING_ENCODING;
1197  p += 1;
1198  SKIP_XML_SPACE(p);
1199 
1200  if (*p == '\'' || *p == '"')
1201  {
1202  const xmlChar *q;
1203 
1204  q = xmlStrchr(p + 1, *p);
1205  if (!q)
1206  return XML_ERR_MISSING_ENCODING;
1207 
1208  if (encoding)
1209  *encoding = xml_pnstrdup(p + 1, q - p - 1);
1210  p = q + 1;
1211  }
1212  else
1213  return XML_ERR_MISSING_ENCODING;
1214  }
1215  else
1216  {
1217  p = save_p;
1218  }
1219 
1220  /* standalone */
1221  save_p = p;
1222  SKIP_XML_SPACE(p);
1223  if (xmlStrncmp(p, (xmlChar *) "standalone", 10) == 0)
1224  {
1225  CHECK_XML_SPACE(save_p);
1226  p += 10;
1227  SKIP_XML_SPACE(p);
1228  if (*p != '=')
1229  return XML_ERR_STANDALONE_VALUE;
1230  p += 1;
1231  SKIP_XML_SPACE(p);
1232  if (xmlStrncmp(p, (xmlChar *) "'yes'", 5) == 0 ||
1233  xmlStrncmp(p, (xmlChar *) "\"yes\"", 5) == 0)
1234  {
1235  if (standalone)
1236  *standalone = 1;
1237  p += 5;
1238  }
1239  else if (xmlStrncmp(p, (xmlChar *) "'no'", 4) == 0 ||
1240  xmlStrncmp(p, (xmlChar *) "\"no\"", 4) == 0)
1241  {
1242  if (standalone)
1243  *standalone = 0;
1244  p += 4;
1245  }
1246  else
1247  return XML_ERR_STANDALONE_VALUE;
1248  }
1249  else
1250  {
1251  p = save_p;
1252  }
1253 
1254  SKIP_XML_SPACE(p);
1255  if (xmlStrncmp(p, (xmlChar *) "?>", 2) != 0)
1256  return XML_ERR_XMLDECL_NOT_FINISHED;
1257  p += 2;
1258 
1259 finished:
1260  len = p - str;
1261 
1262  for (p = str; p < str + len; p++)
1263  if (*p > 127)
1264  return XML_ERR_INVALID_CHAR;
1265 
1266  if (lenp)
1267  *lenp = len;
1268 
1269  return XML_ERR_OK;
1270 }
1271 
1272 
1273 /*
1274  * Write an XML declaration. On output, we adjust the XML declaration
1275  * as follows. (These rules are the moral equivalent of the clause
1276  * "Serialization of an XML value" in the SQL standard.)
1277  *
1278  * We try to avoid generating an XML declaration if possible. This is
1279  * so that you don't get trivial things like xml '<foo/>' resulting in
1280  * '<?xml version="1.0"?><foo/>', which would surely be annoying. We
1281  * must provide a declaration if the standalone property is specified
1282  * or if we include an encoding declaration. If we have a
1283  * declaration, we must specify a version (XML requires this).
1284  * Otherwise we only make a declaration if the version is not "1.0",
1285  * which is the default version specified in SQL:2003.
1286  */
1287 static bool
1288 print_xml_decl(StringInfo buf, const xmlChar *version,
1289  pg_enc encoding, int standalone)
1290 {
1291  if ((version && strcmp((const char *) version, PG_XML_DEFAULT_VERSION) != 0)
1292  || (encoding && encoding != PG_UTF8)
1293  || standalone != -1)
1294  {
1295  appendStringInfoString(buf, "<?xml");
1296 
1297  if (version)
1298  appendStringInfo(buf, " version=\"%s\"", version);
1299  else
1300  appendStringInfo(buf, " version=\"%s\"", PG_XML_DEFAULT_VERSION);
1301 
1302  if (encoding && encoding != PG_UTF8)
1303  {
1304  /*
1305  * XXX might be useful to convert this to IANA names (ISO-8859-1
1306  * instead of LATIN1 etc.); needs field experience
1307  */
1308  appendStringInfo(buf, " encoding=\"%s\"",
1309  pg_encoding_to_char(encoding));
1310  }
1311 
1312  if (standalone == 1)
1313  appendStringInfoString(buf, " standalone=\"yes\"");
1314  else if (standalone == 0)
1315  appendStringInfoString(buf, " standalone=\"no\"");
1316  appendStringInfoString(buf, "?>");
1317 
1318  return true;
1319  }
1320  else
1321  return false;
1322 }
1323 
1324 
1325 /*
1326  * Convert a C string to XML internal representation
1327  *
1328  * Note: it is caller's responsibility to xmlFreeDoc() the result,
1329  * else a permanent memory leak will ensue!
1330  *
1331  * TODO maybe libxml2's xmlreader is better? (do not construct DOM,
1332  * yet do not use SAX - see xmlreader.c)
1333  */
1334 static xmlDocPtr
1335 xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
1336  int encoding)
1337 {
1338  int32 len;
1339  xmlChar *string;
1340  xmlChar *utf8string;
1341  PgXmlErrorContext *xmlerrcxt;
1342  volatile xmlParserCtxtPtr ctxt = NULL;
1343  volatile xmlDocPtr doc = NULL;
1344 
1345  len = VARSIZE(data) - VARHDRSZ; /* will be useful later */
1346  string = xml_text2xmlChar(data);
1347 
1348  utf8string = pg_do_encoding_conversion(string,
1349  len,
1350  encoding,
1351  PG_UTF8);
1352 
1353  /* Start up libxml and its parser */
1355 
1356  /* Use a TRY block to ensure we clean up correctly */
1357  PG_TRY();
1358  {
1359  xmlInitParser();
1360 
1361  ctxt = xmlNewParserCtxt();
1362  if (ctxt == NULL || xmlerrcxt->err_occurred)
1363  xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1364  "could not allocate parser context");
1365 
1366  if (xmloption_arg == XMLOPTION_DOCUMENT)
1367  {
1368  /*
1369  * Note, that here we try to apply DTD defaults
1370  * (XML_PARSE_DTDATTR) according to SQL/XML:2008 GR 10.16.7.d:
1371  * 'Default values defined by internal DTD are applied'. As for
1372  * external DTDs, we try to support them too, (see SQL/XML:2008 GR
1373  * 10.16.7.e)
1374  */
1375  doc = xmlCtxtReadDoc(ctxt, utf8string,
1376  NULL,
1377  "UTF-8",
1378  XML_PARSE_NOENT | XML_PARSE_DTDATTR
1379  | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS));
1380  if (doc == NULL || xmlerrcxt->err_occurred)
1381  xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
1382  "invalid XML document");
1383  }
1384  else
1385  {
1386  int res_code;
1387  size_t count;
1388  xmlChar *version;
1389  int standalone;
1390 
1391  res_code = parse_xml_decl(utf8string,
1392  &count, &version, NULL, &standalone);
1393  if (res_code != 0)
1394  xml_ereport_by_code(ERROR, ERRCODE_INVALID_XML_CONTENT,
1395  "invalid XML content: invalid XML declaration",
1396  res_code);
1397 
1398  doc = xmlNewDoc(version);
1399  Assert(doc->encoding == NULL);
1400  doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
1401  doc->standalone = standalone;
1402 
1403  /* allow empty content */
1404  if (*(utf8string + count))
1405  {
1406  res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0,
1407  utf8string + count, NULL);
1408  if (res_code != 0 || xmlerrcxt->err_occurred)
1409  xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT,
1410  "invalid XML content");
1411  }
1412  }
1413  }
1414  PG_CATCH();
1415  {
1416  if (doc != NULL)
1417  xmlFreeDoc(doc);
1418  if (ctxt != NULL)
1419  xmlFreeParserCtxt(ctxt);
1420 
1421  pg_xml_done(xmlerrcxt, true);
1422 
1423  PG_RE_THROW();
1424  }
1425  PG_END_TRY();
1426 
1427  xmlFreeParserCtxt(ctxt);
1428 
1429  pg_xml_done(xmlerrcxt, false);
1430 
1431  return doc;
1432 }
1433 
1434 
1435 /*
1436  * xmlChar<->text conversions
1437  */
1438 static xmlChar *
1439 xml_text2xmlChar(text *in)
1440 {
1441  return (xmlChar *) text_to_cstring(in);
1442 }
1443 
1444 
1445 #ifdef USE_LIBXMLCONTEXT
1446 
1447 /*
1448  * Manage the special context used for all libxml allocations (but only
1449  * in special debug builds; see notes at top of file)
1450  */
1451 static void
1452 xml_memory_init(void)
1453 {
1454  /* Create memory context if not there already */
1455  if (LibxmlContext == NULL)
1456  LibxmlContext = AllocSetContextCreate(TopMemoryContext,
1457  "LibxmlContext",
1461 
1462  /* Re-establish the callbacks even if already set */
1463  xmlMemSetup(xml_pfree, xml_palloc, xml_repalloc, xml_pstrdup);
1464 }
1465 
1466 /*
1467  * Wrappers for memory management functions
1468  */
1469 static void *
1470 xml_palloc(size_t size)
1471 {
1472  return MemoryContextAlloc(LibxmlContext, size);
1473 }
1474 
1475 
1476 static void *
1477 xml_repalloc(void *ptr, size_t size)
1478 {
1479  return repalloc(ptr, size);
1480 }
1481 
1482 
1483 static void
1484 xml_pfree(void *ptr)
1485 {
1486  /* At least some parts of libxml assume xmlFree(NULL) is allowed */
1487  if (ptr)
1488  pfree(ptr);
1489 }
1490 
1491 
1492 static char *
1493 xml_pstrdup(const char *string)
1494 {
1495  return MemoryContextStrdup(LibxmlContext, string);
1496 }
1497 #endif /* USE_LIBXMLCONTEXT */
1498 
1499 
1500 /*
1501  * xmlPgEntityLoader --- entity loader callback function
1502  *
1503  * Silently prevent any external entity URL from being loaded. We don't want
1504  * to throw an error, so instead make the entity appear to expand to an empty
1505  * string.
1506  *
1507  * We would prefer to allow loading entities that exist in the system's
1508  * global XML catalog; but the available libxml2 APIs make that a complex
1509  * and fragile task. For now, just shut down all external access.
1510  */
1511 static xmlParserInputPtr
1512 xmlPgEntityLoader(const char *URL, const char *ID,
1513  xmlParserCtxtPtr ctxt)
1514 {
1515  return xmlNewStringInputStream(ctxt, (const xmlChar *) "");
1516 }
1517 
1518 
1519 /*
1520  * xml_ereport --- report an XML-related error
1521  *
1522  * The "msg" is the SQL-level message; some can be adopted from the SQL/XML
1523  * standard. This function adds libxml's native error message, if any, as
1524  * detail.
1525  *
1526  * This is exported for modules that want to share the core libxml error
1527  * handler. Note that pg_xml_init() *must* have been called previously.
1528  */
1529 void
1530 xml_ereport(PgXmlErrorContext *errcxt, int level, int sqlcode, const char *msg)
1531 {
1532  char *detail;
1533 
1534  /* Defend against someone passing us a bogus context struct */
1535  if (errcxt->magic != ERRCXT_MAGIC)
1536  elog(ERROR, "xml_ereport called with invalid PgXmlErrorContext");
1537 
1538  /* Flag that the current libxml error has been reported */
1539  errcxt->err_occurred = false;
1540 
1541  /* Include detail only if we have some text from libxml */
1542  if (errcxt->err_buf.len > 0)
1543  detail = errcxt->err_buf.data;
1544  else
1545  detail = NULL;
1546 
1547  ereport(level,
1548  (errcode(sqlcode),
1549  errmsg_internal("%s", msg),
1550  detail ? errdetail_internal("%s", detail) : 0));
1551 }
1552 
1553 
1554 /*
1555  * Error handler for libxml errors and warnings
1556  */
1557 static void
1558 xml_errorHandler(void *data, xmlErrorPtr error)
1559 {
1560  PgXmlErrorContext *xmlerrcxt = (PgXmlErrorContext *) data;
1561  xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) error->ctxt;
1562  xmlParserInputPtr input = (ctxt != NULL) ? ctxt->input : NULL;
1563  xmlNodePtr node = error->node;
1564  const xmlChar *name = (node != NULL &&
1565  node->type == XML_ELEMENT_NODE) ? node->name : NULL;
1566  int domain = error->domain;
1567  int level = error->level;
1568  StringInfo errorBuf;
1569 
1570  /*
1571  * Defend against someone passing us a bogus context struct.
1572  *
1573  * We force a backend exit if this check fails because longjmp'ing out of
1574  * libxml would likely render it unsafe to use further.
1575  */
1576  if (xmlerrcxt->magic != ERRCXT_MAGIC)
1577  elog(FATAL, "xml_errorHandler called with invalid PgXmlErrorContext");
1578 
1579  /*----------
1580  * Older libxml versions report some errors differently.
1581  * First, some errors were previously reported as coming from the parser
1582  * domain but are now reported as coming from the namespace domain.
1583  * Second, some warnings were upgraded to errors.
1584  * We attempt to compensate for that here.
1585  *----------
1586  */
1587  switch (error->code)
1588  {
1589  case XML_WAR_NS_URI:
1590  level = XML_ERR_ERROR;
1591  domain = XML_FROM_NAMESPACE;
1592  break;
1593 
1594  case XML_ERR_NS_DECL_ERROR:
1595  case XML_WAR_NS_URI_RELATIVE:
1596  case XML_WAR_NS_COLUMN:
1597  case XML_NS_ERR_XML_NAMESPACE:
1598  case XML_NS_ERR_UNDEFINED_NAMESPACE:
1599  case XML_NS_ERR_QNAME:
1600  case XML_NS_ERR_ATTRIBUTE_REDEFINED:
1601  case XML_NS_ERR_EMPTY:
1602  domain = XML_FROM_NAMESPACE;
1603  break;
1604  }
1605 
1606  /* Decide whether to act on the error or not */
1607  switch (domain)
1608  {
1609  case XML_FROM_PARSER:
1610  case XML_FROM_NONE:
1611  case XML_FROM_MEMORY:
1612  case XML_FROM_IO:
1613 
1614  /*
1615  * Suppress warnings about undeclared entities. We need to do
1616  * this to avoid problems due to not loading DTD definitions.
1617  */
1618  if (error->code == XML_WAR_UNDECLARED_ENTITY)
1619  return;
1620 
1621  /* Otherwise, accept error regardless of the parsing purpose */
1622  break;
1623 
1624  default:
1625  /* Ignore error if only doing well-formedness check */
1626  if (xmlerrcxt->strictness == PG_XML_STRICTNESS_WELLFORMED)
1627  return;
1628  break;
1629  }
1630 
1631  /* Prepare error message in errorBuf */
1632  errorBuf = makeStringInfo();
1633 
1634  if (error->line > 0)
1635  appendStringInfo(errorBuf, "line %d: ", error->line);
1636  if (name != NULL)
1637  appendStringInfo(errorBuf, "element %s: ", name);
1638  appendStringInfoString(errorBuf, error->message);
1639 
1640  /*
1641  * Append context information to errorBuf.
1642  *
1643  * xmlParserPrintFileContext() uses libxml's "generic" error handler to
1644  * write the context. Since we don't want to duplicate libxml
1645  * functionality here, we set up a generic error handler temporarily.
1646  *
1647  * We use appendStringInfo() directly as libxml's generic error handler.
1648  * This should work because it has essentially the same signature as
1649  * libxml expects, namely (void *ptr, const char *msg, ...).
1650  */
1651  if (input != NULL)
1652  {
1653  xmlGenericErrorFunc errFuncSaved = xmlGenericError;
1654  void *errCtxSaved = xmlGenericErrorContext;
1655 
1656  xmlSetGenericErrorFunc((void *) errorBuf,
1657  (xmlGenericErrorFunc) appendStringInfo);
1658 
1659  /* Add context information to errorBuf */
1660  appendStringInfoLineSeparator(errorBuf);
1661 
1662  xmlParserPrintFileContext(input);
1663 
1664  /* Restore generic error func */
1665  xmlSetGenericErrorFunc(errCtxSaved, errFuncSaved);
1666  }
1667 
1668  /* Get rid of any trailing newlines in errorBuf */
1669  chopStringInfoNewlines(errorBuf);
1670 
1671  /*
1672  * Legacy error handling mode. err_occurred is never set, we just add the
1673  * message to err_buf. This mode exists because the xml2 contrib module
1674  * uses our error-handling infrastructure, but we don't want to change its
1675  * behaviour since it's deprecated anyway. This is also why we don't
1676  * distinguish between notices, warnings and errors here --- the old-style
1677  * generic error handler wouldn't have done that either.
1678  */
1679  if (xmlerrcxt->strictness == PG_XML_STRICTNESS_LEGACY)
1680  {
1681  appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
1682  appendStringInfoString(&xmlerrcxt->err_buf, errorBuf->data);
1683 
1684  pfree(errorBuf->data);
1685  pfree(errorBuf);
1686  return;
1687  }
1688 
1689  /*
1690  * We don't want to ereport() here because that'd probably leave libxml in
1691  * an inconsistent state. Instead, we remember the error and ereport()
1692  * from xml_ereport().
1693  *
1694  * Warnings and notices can be reported immediately since they won't cause
1695  * a longjmp() out of libxml.
1696  */
1697  if (level >= XML_ERR_ERROR)
1698  {
1699  appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
1700  appendStringInfoString(&xmlerrcxt->err_buf, errorBuf->data);
1701 
1702  xmlerrcxt->err_occurred = true;
1703  }
1704  else if (level >= XML_ERR_WARNING)
1705  {
1706  ereport(WARNING,
1707  (errmsg_internal("%s", errorBuf->data)));
1708  }
1709  else
1710  {
1711  ereport(NOTICE,
1712  (errmsg_internal("%s", errorBuf->data)));
1713  }
1714 
1715  pfree(errorBuf->data);
1716  pfree(errorBuf);
1717 }
1718 
1719 
1720 /*
1721  * Wrapper for "ereport" function for XML-related errors. The "msg"
1722  * is the SQL-level message; some can be adopted from the SQL/XML
1723  * standard. This function uses "code" to create a textual detail
1724  * message. At the moment, we only need to cover those codes that we
1725  * may raise in this file.
1726  */
1727 static void
1728 xml_ereport_by_code(int level, int sqlcode,
1729  const char *msg, int code)
1730 {
1731  const char *det;
1732 
1733  switch (code)
1734  {
1735  case XML_ERR_INVALID_CHAR:
1736  det = gettext_noop("Invalid character value.");
1737  break;
1738  case XML_ERR_SPACE_REQUIRED:
1739  det = gettext_noop("Space required.");
1740  break;
1741  case XML_ERR_STANDALONE_VALUE:
1742  det = gettext_noop("standalone accepts only 'yes' or 'no'.");
1743  break;
1744  case XML_ERR_VERSION_MISSING:
1745  det = gettext_noop("Malformed declaration: missing version.");
1746  break;
1747  case XML_ERR_MISSING_ENCODING:
1748  det = gettext_noop("Missing encoding in text declaration.");
1749  break;
1750  case XML_ERR_XMLDECL_NOT_FINISHED:
1751  det = gettext_noop("Parsing XML declaration: '?>' expected.");
1752  break;
1753  default:
1754  det = gettext_noop("Unrecognized libxml error code: %d.");
1755  break;
1756  }
1757 
1758  ereport(level,
1759  (errcode(sqlcode),
1760  errmsg_internal("%s", msg),
1761  errdetail(det, code)));
1762 }
1763 
1764 
1765 /*
1766  * Remove all trailing newlines from a StringInfo string
1767  */
1768 static void
1769 chopStringInfoNewlines(StringInfo str)
1770 {
1771  while (str->len > 0 && str->data[str->len - 1] == '\n')
1772  str->data[--str->len] = '\0';
1773 }
1774 
1775 
1776 /*
1777  * Append a newline after removing any existing trailing newlines
1778  */
1779 static void
1780 appendStringInfoLineSeparator(StringInfo str)
1781 {
1782  chopStringInfoNewlines(str);
1783  if (str->len > 0)
1784  appendStringInfoChar(str, '\n');
1785 }
1786 
1787 
1788 /*
1789  * Convert one char in the current server encoding to a Unicode codepoint.
1790  */
1791 static pg_wchar
1792 sqlchar_to_unicode(char *s)
1793 {
1794  char *utf8string;
1795  pg_wchar ret[2]; /* need space for trailing zero */
1796 
1797  /* note we're not assuming s is null-terminated */
1798  utf8string = pg_server_to_any(s, pg_mblen(s), PG_UTF8);
1799 
1800  pg_encoding_mb2wchar_with_len(PG_UTF8, utf8string, ret,
1801  pg_encoding_mblen(PG_UTF8, utf8string));
1802 
1803  if (utf8string != s)
1804  pfree(utf8string);
1805 
1806  return ret[0];
1807 }
1808 
1809 
1810 static bool
1811 is_valid_xml_namefirst(pg_wchar c)
1812 {
1813  /* (Letter | '_' | ':') */
1814  return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
1815  || c == '_' || c == ':');
1816 }
1817 
1818 
1819 static bool
1820 is_valid_xml_namechar(pg_wchar c)
1821 {
1822  /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
1823  return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
1824  || xmlIsDigitQ(c)
1825  || c == '.' || c == '-' || c == '_' || c == ':'
1826  || xmlIsCombiningQ(c)
1827  || xmlIsExtenderQ(c));
1828 }
1829 #endif /* USE_LIBXML */
1830 
1831 
1832 /*
1833  * Map SQL identifier to XML name; see SQL/XML:2008 section 9.1.
1834  */
1835 char *
1836 map_sql_identifier_to_xml_name(char *ident, bool fully_escaped,
1837  bool escape_period)
1838 {
1839 #ifdef USE_LIBXML
1841  char *p;
1842 
1843  /*
1844  * SQL/XML doesn't make use of this case anywhere, so it's probably a
1845  * mistake.
1846  */
1847  Assert(fully_escaped || !escape_period);
1848 
1849  initStringInfo(&buf);
1850 
1851  for (p = ident; *p; p += pg_mblen(p))
1852  {
1853  if (*p == ':' && (p == ident || fully_escaped))
1854  appendStringInfoString(&buf, "_x003A_");
1855  else if (*p == '_' && *(p + 1) == 'x')
1856  appendStringInfoString(&buf, "_x005F_");
1857  else if (fully_escaped && p == ident &&
1858  pg_strncasecmp(p, "xml", 3) == 0)
1859  {
1860  if (*p == 'x')
1861  appendStringInfoString(&buf, "_x0078_");
1862  else
1863  appendStringInfoString(&buf, "_x0058_");
1864  }
1865  else if (escape_period && *p == '.')
1866  appendStringInfoString(&buf, "_x002E_");
1867  else
1868  {
1869  pg_wchar u = sqlchar_to_unicode(p);
1870 
1871  if ((p == ident)
1872  ? !is_valid_xml_namefirst(u)
1873  : !is_valid_xml_namechar(u))
1874  appendStringInfo(&buf, "_x%04X_", (unsigned int) u);
1875  else
1876  appendBinaryStringInfo(&buf, p, pg_mblen(p));
1877  }
1878  }
1879 
1880  return buf.data;
1881 #else /* not USE_LIBXML */
1882  NO_XML_SUPPORT();
1883  return NULL;
1884 #endif /* not USE_LIBXML */
1885 }
1886 
1887 
1888 /*
1889  * Map a Unicode codepoint into the current server encoding.
1890  */
1891 static char *
1893 {
1894  char utf8string[8]; /* need room for trailing zero */
1895  char *result;
1896 
1897  memset(utf8string, 0, sizeof(utf8string));
1898  unicode_to_utf8(c, (unsigned char *) utf8string);
1899 
1900  result = pg_any_to_server(utf8string, strlen(utf8string), PG_UTF8);
1901  /* if pg_any_to_server didn't strdup, we must */
1902  if (result == utf8string)
1903  result = pstrdup(result);
1904  return result;
1905 }
1906 
1907 
1908 /*
1909  * Map XML name to SQL identifier; see SQL/XML:2008 section 9.3.
1910  */
1911 char *
1913 {
1915  char *p;
1916 
1917  initStringInfo(&buf);
1918 
1919  for (p = name; *p; p += pg_mblen(p))
1920  {
1921  if (*p == '_' && *(p + 1) == 'x'
1922  && isxdigit((unsigned char) *(p + 2))
1923  && isxdigit((unsigned char) *(p + 3))
1924  && isxdigit((unsigned char) *(p + 4))
1925  && isxdigit((unsigned char) *(p + 5))
1926  && *(p + 6) == '_')
1927  {
1928  unsigned int u;
1929 
1930  sscanf(p + 2, "%X", &u);
1932  p += 6;
1933  }
1934  else
1935  appendBinaryStringInfo(&buf, p, pg_mblen(p));
1936  }
1937 
1938  return buf.data;
1939 }
1940 
1941 /*
1942  * Map SQL value to XML value; see SQL/XML:2008 section 9.8.
1943  *
1944  * When xml_escape_strings is true, then certain characters in string
1945  * values are replaced by entity references (&lt; etc.), as specified
1946  * in SQL/XML:2008 section 9.8 GR 9) a) iii). This is normally what is
1947  * wanted. The false case is mainly useful when the resulting value
1948  * is used with xmlTextWriterWriteAttribute() to write out an
1949  * attribute, because that function does the escaping itself.
1950  */
1951 char *
1952 map_sql_value_to_xml_value(Datum value, Oid type, bool xml_escape_strings)
1953 {
1954  if (type_is_array_domain(type))
1955  {
1956  ArrayType *array;
1957  Oid elmtype;
1958  int16 elmlen;
1959  bool elmbyval;
1960  char elmalign;
1961  int num_elems;
1962  Datum *elem_values;
1963  bool *elem_nulls;
1965  int i;
1966 
1967  array = DatumGetArrayTypeP(value);
1968  elmtype = ARR_ELEMTYPE(array);
1969  get_typlenbyvalalign(elmtype, &elmlen, &elmbyval, &elmalign);
1970 
1971  deconstruct_array(array, elmtype,
1972  elmlen, elmbyval, elmalign,
1973  &elem_values, &elem_nulls,
1974  &num_elems);
1975 
1976  initStringInfo(&buf);
1977 
1978  for (i = 0; i < num_elems; i++)
1979  {
1980  if (elem_nulls[i])
1981  continue;
1982  appendStringInfoString(&buf, "<element>");
1984  map_sql_value_to_xml_value(elem_values[i],
1985  elmtype, true));
1986  appendStringInfoString(&buf, "</element>");
1987  }
1988 
1989  pfree(elem_values);
1990  pfree(elem_nulls);
1991 
1992  return buf.data;
1993  }
1994  else
1995  {
1996  Oid typeOut;
1997  bool isvarlena;
1998  char *str;
1999 
2000  /*
2001  * Flatten domains; the special-case treatments below should apply to,
2002  * eg, domains over boolean not just boolean.
2003  */
2004  type = getBaseType(type);
2005 
2006  /*
2007  * Special XSD formatting for some data types
2008  */
2009  switch (type)
2010  {
2011  case BOOLOID:
2012  if (DatumGetBool(value))
2013  return "true";
2014  else
2015  return "false";
2016 
2017  case DATEOID:
2018  {
2019  DateADT date;
2020  struct pg_tm tm;
2021  char buf[MAXDATELEN + 1];
2022 
2023  date = DatumGetDateADT(value);
2024  /* XSD doesn't support infinite values */
2025  if (DATE_NOT_FINITE(date))
2026  ereport(ERROR,
2027  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2028  errmsg("date out of range"),
2029  errdetail("XML does not support infinite date values.")));
2031  &(tm.tm_year), &(tm.tm_mon), &(tm.tm_mday));
2032  EncodeDateOnly(&tm, USE_XSD_DATES, buf);
2033 
2034  return pstrdup(buf);
2035  }
2036 
2037  case TIMESTAMPOID:
2038  {
2040  struct pg_tm tm;
2041  fsec_t fsec;
2042  char buf[MAXDATELEN + 1];
2043 
2044  timestamp = DatumGetTimestamp(value);
2045 
2046  /* XSD doesn't support infinite values */
2047  if (TIMESTAMP_NOT_FINITE(timestamp))
2048  ereport(ERROR,
2049  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2050  errmsg("timestamp out of range"),
2051  errdetail("XML does not support infinite timestamp values.")));
2052  else if (timestamp2tm(timestamp, NULL, &tm, &fsec, NULL, NULL) == 0)
2053  EncodeDateTime(&tm, fsec, false, 0, NULL, USE_XSD_DATES, buf);
2054  else
2055  ereport(ERROR,
2056  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2057  errmsg("timestamp out of range")));
2058 
2059  return pstrdup(buf);
2060  }
2061 
2062  case TIMESTAMPTZOID:
2063  {
2065  struct pg_tm tm;
2066  int tz;
2067  fsec_t fsec;
2068  const char *tzn = NULL;
2069  char buf[MAXDATELEN + 1];
2070 
2071  timestamp = DatumGetTimestamp(value);
2072 
2073  /* XSD doesn't support infinite values */
2074  if (TIMESTAMP_NOT_FINITE(timestamp))
2075  ereport(ERROR,
2076  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2077  errmsg("timestamp out of range"),
2078  errdetail("XML does not support infinite timestamp values.")));
2079  else if (timestamp2tm(timestamp, &tz, &tm, &fsec, &tzn, NULL) == 0)
2080  EncodeDateTime(&tm, fsec, true, tz, tzn, USE_XSD_DATES, buf);
2081  else
2082  ereport(ERROR,
2083  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2084  errmsg("timestamp out of range")));
2085 
2086  return pstrdup(buf);
2087  }
2088 
2089 #ifdef USE_LIBXML
2090  case BYTEAOID:
2091  {
2092  bytea *bstr = DatumGetByteaPP(value);
2093  PgXmlErrorContext *xmlerrcxt;
2094  volatile xmlBufferPtr buf = NULL;
2095  volatile xmlTextWriterPtr writer = NULL;
2096  char *result;
2097 
2098  xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
2099 
2100  PG_TRY();
2101  {
2102  buf = xmlBufferCreate();
2103  if (buf == NULL || xmlerrcxt->err_occurred)
2104  xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
2105  "could not allocate xmlBuffer");
2106  writer = xmlNewTextWriterMemory(buf, 0);
2107  if (writer == NULL || xmlerrcxt->err_occurred)
2108  xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
2109  "could not allocate xmlTextWriter");
2110 
2111  if (xmlbinary == XMLBINARY_BASE64)
2112  xmlTextWriterWriteBase64(writer, VARDATA_ANY(bstr),
2113  0, VARSIZE_ANY_EXHDR(bstr));
2114  else
2115  xmlTextWriterWriteBinHex(writer, VARDATA_ANY(bstr),
2116  0, VARSIZE_ANY_EXHDR(bstr));
2117 
2118  /* we MUST do this now to flush data out to the buffer */
2119  xmlFreeTextWriter(writer);
2120  writer = NULL;
2121 
2122  result = pstrdup((const char *) xmlBufferContent(buf));
2123  }
2124  PG_CATCH();
2125  {
2126  if (writer)
2127  xmlFreeTextWriter(writer);
2128  if (buf)
2129  xmlBufferFree(buf);
2130 
2131  pg_xml_done(xmlerrcxt, true);
2132 
2133  PG_RE_THROW();
2134  }
2135  PG_END_TRY();
2136 
2137  xmlBufferFree(buf);
2138 
2139  pg_xml_done(xmlerrcxt, false);
2140 
2141  return result;
2142  }
2143 #endif /* USE_LIBXML */
2144 
2145  }
2146 
2147  /*
2148  * otherwise, just use the type's native text representation
2149  */
2150  getTypeOutputInfo(type, &typeOut, &isvarlena);
2151  str = OidOutputFunctionCall(typeOut, value);
2152 
2153  /* ... exactly as-is for XML, and when escaping is not wanted */
2154  if (type == XMLOID || !xml_escape_strings)
2155  return str;
2156 
2157  /* otherwise, translate special characters as needed */
2158  return escape_xml(str);
2159  }
2160 }
2161 
2162 
2163 /*
2164  * Escape characters in text that have special meanings in XML.
2165  *
2166  * Returns a palloc'd string.
2167  *
2168  * NB: this is intentionally not dependent on libxml.
2169  */
2170 char *
2171 escape_xml(const char *str)
2172 {
2174  const char *p;
2175 
2176  initStringInfo(&buf);
2177  for (p = str; *p; p++)
2178  {
2179  switch (*p)
2180  {
2181  case '&':
2182  appendStringInfoString(&buf, "&amp;");
2183  break;
2184  case '<':
2185  appendStringInfoString(&buf, "&lt;");
2186  break;
2187  case '>':
2188  appendStringInfoString(&buf, "&gt;");
2189  break;
2190  case '\r':
2191  appendStringInfoString(&buf, "&#x0d;");
2192  break;
2193  default:
2194  appendStringInfoCharMacro(&buf, *p);
2195  break;
2196  }
2197  }
2198  return buf.data;
2199 }
2200 
2201 
2202 static char *
2203 _SPI_strdup(const char *s)
2204 {
2205  size_t len = strlen(s) + 1;
2206  char *ret = SPI_palloc(len);
2207 
2208  memcpy(ret, s, len);
2209  return ret;
2210 }
2211 
2212 
2213 /*
2214  * SQL to XML mapping functions
2215  *
2216  * What follows below was at one point intentionally organized so that
2217  * you can read along in the SQL/XML standard. The functions are
2218  * mostly split up the way the clauses lay out in the standards
2219  * document, and the identifiers are also aligned with the standard
2220  * text. Unfortunately, SQL/XML:2006 reordered the clauses
2221  * differently than SQL/XML:2003, so the order below doesn't make much
2222  * sense anymore.
2223  *
2224  * There are many things going on there:
2225  *
2226  * There are two kinds of mappings: Mapping SQL data (table contents)
2227  * to XML documents, and mapping SQL structure (the "schema") to XML
2228  * Schema. And there are functions that do both at the same time.
2229  *
2230  * Then you can map a database, a schema, or a table, each in both
2231  * ways. This breaks down recursively: Mapping a database invokes
2232  * mapping schemas, which invokes mapping tables, which invokes
2233  * mapping rows, which invokes mapping columns, although you can't
2234  * call the last two from the outside. Because of this, there are a
2235  * number of xyz_internal() functions which are to be called both from
2236  * the function manager wrapper and from some upper layer in a
2237  * recursive call.
2238  *
2239  * See the documentation about what the common function arguments
2240  * nulls, tableforest, and targetns mean.
2241  *
2242  * Some style guidelines for XML output: Use double quotes for quoting
2243  * XML attributes. Indent XML elements by two spaces, but remember
2244  * that a lot of code is called recursively at different levels, so
2245  * it's better not to indent rather than create output that indents
2246  * and outdents weirdly. Add newlines to make the output look nice.
2247  */
2248 
2249 
2250 /*
2251  * Visibility of objects for XML mappings; see SQL/XML:2008 section
2252  * 4.10.8.
2253  */
2254 
2255 /*
2256  * Given a query, which must return type oid as first column, produce
2257  * a list of Oids with the query results.
2258  */
2259 static List *
2260 query_to_oid_list(const char *query)
2261 {
2262  int i;
2263  List *list = NIL;
2264 
2265  SPI_execute(query, true, 0);
2266 
2267  for (i = 0; i < SPI_processed; i++)
2268  {
2269  Datum oid;
2270  bool isnull;
2271 
2272  oid = SPI_getbinval(SPI_tuptable->vals[i],
2274  1,
2275  &isnull);
2276  if (!isnull)
2277  list = lappend_oid(list, DatumGetObjectId(oid));
2278  }
2279 
2280  return list;
2281 }
2282 
2283 
2284 static List *
2286 {
2287  StringInfoData query;
2288 
2289  initStringInfo(&query);
2290  appendStringInfo(&query, "SELECT oid FROM pg_catalog.pg_class WHERE relnamespace = %u AND relkind IN ('r', 'm', 'v') AND pg_catalog.has_table_privilege (oid, 'SELECT') ORDER BY relname;", nspid);
2291 
2292  return query_to_oid_list(query.data);
2293 }
2294 
2295 
2296 /*
2297  * Including the system schemas is probably not useful for a database
2298  * mapping.
2299  */
2300 #define XML_VISIBLE_SCHEMAS_EXCLUDE "(nspname ~ '^pg_' OR nspname = 'information_schema')"
2301 
2302 #define XML_VISIBLE_SCHEMAS "SELECT oid FROM pg_catalog.pg_namespace WHERE pg_catalog.has_schema_privilege (oid, 'USAGE') AND NOT " XML_VISIBLE_SCHEMAS_EXCLUDE
2303 
2304 
2305 static List *
2307 {
2308  return query_to_oid_list(XML_VISIBLE_SCHEMAS " ORDER BY nspname;");
2309 }
2310 
2311 
2312 static List *
2314 {
2315  /* At the moment there is no order required here. */
2316  return query_to_oid_list("SELECT oid FROM pg_catalog.pg_class WHERE relkind IN ('r', 'm', 'v') AND pg_catalog.has_table_privilege (pg_class.oid, 'SELECT') AND relnamespace IN (" XML_VISIBLE_SCHEMAS ");");
2317 }
2318 
2319 
2320 /*
2321  * Map SQL table to XML and/or XML Schema document; see SQL/XML:2008
2322  * section 9.11.
2323  */
2324 
2325 static StringInfo
2327  const char *xmlschema, bool nulls, bool tableforest,
2328  const char *targetns, bool top_level)
2329 {
2330  StringInfoData query;
2331 
2332  initStringInfo(&query);
2333  appendStringInfo(&query, "SELECT * FROM %s",
2335  ObjectIdGetDatum(relid))));
2336  return query_to_xml_internal(query.data, get_rel_name(relid),
2337  xmlschema, nulls, tableforest,
2338  targetns, top_level);
2339 }
2340 
2341 
2342 Datum
2344 {
2345  Oid relid = PG_GETARG_OID(0);
2346  bool nulls = PG_GETARG_BOOL(1);
2347  bool tableforest = PG_GETARG_BOOL(2);
2348  const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2349 
2351  nulls, tableforest,
2352  targetns, true)));
2353 }
2354 
2355 
2356 Datum
2358 {
2359  char *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2360  bool nulls = PG_GETARG_BOOL(1);
2361  bool tableforest = PG_GETARG_BOOL(2);
2362  const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2363 
2365  NULL, nulls, tableforest,
2366  targetns, true)));
2367 }
2368 
2369 
2370 Datum
2372 {
2373  char *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
2374  int32 count = PG_GETARG_INT32(1);
2375  bool nulls = PG_GETARG_BOOL(2);
2376  bool tableforest = PG_GETARG_BOOL(3);
2377  const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(4));
2378 
2379  StringInfoData result;
2380  Portal portal;
2381  int i;
2382 
2383  initStringInfo(&result);
2384 
2385  SPI_connect();
2386  portal = SPI_cursor_find(name);
2387  if (portal == NULL)
2388  ereport(ERROR,
2389  (errcode(ERRCODE_UNDEFINED_CURSOR),
2390  errmsg("cursor \"%s\" does not exist", name)));
2391 
2392  SPI_cursor_fetch(portal, true, count);
2393  for (i = 0; i < SPI_processed; i++)
2394  SPI_sql_row_to_xmlelement(i, &result, NULL, nulls,
2395  tableforest, targetns, true);
2396 
2397  SPI_finish();
2398 
2400 }
2401 
2402 
2403 /*
2404  * Write the start tag of the root element of a data mapping.
2405  *
2406  * top_level means that this is the very top level of the eventual
2407  * output. For example, when the user calls table_to_xml, then a call
2408  * with a table name to this function is the top level. When the user
2409  * calls database_to_xml, then a call with a schema name to this
2410  * function is not the top level. If top_level is false, then the XML
2411  * namespace declarations are omitted, because they supposedly already
2412  * appeared earlier in the output. Repeating them is not wrong, but
2413  * it looks ugly.
2414  */
2415 static void
2416 xmldata_root_element_start(StringInfo result, const char *eltname,
2417  const char *xmlschema, const char *targetns,
2418  bool top_level)
2419 {
2420  /* This isn't really wrong but currently makes no sense. */
2421  Assert(top_level || !xmlschema);
2422 
2423  appendStringInfo(result, "<%s", eltname);
2424  if (top_level)
2425  {
2426  appendStringInfoString(result, " xmlns:xsi=\"" NAMESPACE_XSI "\"");
2427  if (strlen(targetns) > 0)
2428  appendStringInfo(result, " xmlns=\"%s\"", targetns);
2429  }
2430  if (xmlschema)
2431  {
2432  /* FIXME: better targets */
2433  if (strlen(targetns) > 0)
2434  appendStringInfo(result, " xsi:schemaLocation=\"%s #\"", targetns);
2435  else
2436  appendStringInfoString(result, " xsi:noNamespaceSchemaLocation=\"#\"");
2437  }
2438  appendStringInfoString(result, ">\n");
2439 }
2440 
2441 
2442 static void
2443 xmldata_root_element_end(StringInfo result, const char *eltname)
2444 {
2445  appendStringInfo(result, "</%s>\n", eltname);
2446 }
2447 
2448 
2449 static StringInfo
2450 query_to_xml_internal(const char *query, char *tablename,
2451  const char *xmlschema, bool nulls, bool tableforest,
2452  const char *targetns, bool top_level)
2453 {
2454  StringInfo result;
2455  char *xmltn;
2456  int i;
2457 
2458  if (tablename)
2459  xmltn = map_sql_identifier_to_xml_name(tablename, true, false);
2460  else
2461  xmltn = "table";
2462 
2463  result = makeStringInfo();
2464 
2465  SPI_connect();
2466  if (SPI_execute(query, true, 0) != SPI_OK_SELECT)
2467  ereport(ERROR,
2468  (errcode(ERRCODE_DATA_EXCEPTION),
2469  errmsg("invalid query")));
2470 
2471  if (!tableforest)
2472  {
2473  xmldata_root_element_start(result, xmltn, xmlschema,
2474  targetns, top_level);
2475  appendStringInfoString(result, "\n");
2476  }
2477 
2478  if (xmlschema)
2479  appendStringInfo(result, "%s\n\n", xmlschema);
2480 
2481  for (i = 0; i < SPI_processed; i++)
2482  SPI_sql_row_to_xmlelement(i, result, tablename, nulls,
2483  tableforest, targetns, top_level);
2484 
2485  if (!tableforest)
2486  xmldata_root_element_end(result, xmltn);
2487 
2488  SPI_finish();
2489 
2490  return result;
2491 }
2492 
2493 
2494 Datum
2496 {
2497  Oid relid = PG_GETARG_OID(0);
2498  bool nulls = PG_GETARG_BOOL(1);
2499  bool tableforest = PG_GETARG_BOOL(2);
2500  const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2501  const char *result;
2502  Relation rel;
2503 
2504  rel = heap_open(relid, AccessShareLock);
2505  result = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls,
2506  tableforest, targetns);
2507  heap_close(rel, NoLock);
2508 
2510 }
2511 
2512 
2513 Datum
2515 {
2516  char *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2517  bool nulls = PG_GETARG_BOOL(1);
2518  bool tableforest = PG_GETARG_BOOL(2);
2519  const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2520  const char *result;
2521  SPIPlanPtr plan;
2522  Portal portal;
2523 
2524  SPI_connect();
2525 
2526  if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
2527  elog(ERROR, "SPI_prepare(\"%s\") failed", query);
2528 
2529  if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
2530  elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
2531 
2533  InvalidOid, nulls,
2534  tableforest, targetns));
2535  SPI_cursor_close(portal);
2536  SPI_finish();
2537 
2539 }
2540 
2541 
2542 Datum
2544 {
2545  char *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
2546  bool nulls = PG_GETARG_BOOL(1);
2547  bool tableforest = PG_GETARG_BOOL(2);
2548  const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2549  const char *xmlschema;
2550  Portal portal;
2551 
2552  SPI_connect();
2553  portal = SPI_cursor_find(name);
2554  if (portal == NULL)
2555  ereport(ERROR,
2556  (errcode(ERRCODE_UNDEFINED_CURSOR),
2557  errmsg("cursor \"%s\" does not exist", name)));
2558 
2559  xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
2560  InvalidOid, nulls,
2561  tableforest, targetns));
2562  SPI_finish();
2563 
2564  PG_RETURN_XML_P(cstring_to_xmltype(xmlschema));
2565 }
2566 
2567 
2568 Datum
2570 {
2571  Oid relid = PG_GETARG_OID(0);
2572  bool nulls = PG_GETARG_BOOL(1);
2573  bool tableforest = PG_GETARG_BOOL(2);
2574  const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2575  Relation rel;
2576  const char *xmlschema;
2577 
2578  rel = heap_open(relid, AccessShareLock);
2579  xmlschema = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls,
2580  tableforest, targetns);
2581  heap_close(rel, NoLock);
2582 
2584  xmlschema, nulls, tableforest,
2585  targetns, true)));
2586 }
2587 
2588 
2589 Datum
2591 {
2592  char *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2593  bool nulls = PG_GETARG_BOOL(1);
2594  bool tableforest = PG_GETARG_BOOL(2);
2595  const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2596 
2597  const char *xmlschema;
2598  SPIPlanPtr plan;
2599  Portal portal;
2600 
2601  SPI_connect();
2602 
2603  if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
2604  elog(ERROR, "SPI_prepare(\"%s\") failed", query);
2605 
2606  if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
2607  elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
2608 
2609  xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
2610  InvalidOid, nulls, tableforest, targetns));
2611  SPI_cursor_close(portal);
2612  SPI_finish();
2613 
2615  xmlschema, nulls, tableforest,
2616  targetns, true)));
2617 }
2618 
2619 
2620 /*
2621  * Map SQL schema to XML and/or XML Schema document; see SQL/XML:2008
2622  * sections 9.13, 9.14.
2623  */
2624 
2625 static StringInfo
2626 schema_to_xml_internal(Oid nspid, const char *xmlschema, bool nulls,
2627  bool tableforest, const char *targetns, bool top_level)
2628 {
2629  StringInfo result;
2630  char *xmlsn;
2631  List *relid_list;
2632  ListCell *cell;
2633 
2635  true, false);
2636  result = makeStringInfo();
2637 
2638  xmldata_root_element_start(result, xmlsn, xmlschema, targetns, top_level);
2639  appendStringInfoString(result, "\n");
2640 
2641  if (xmlschema)
2642  appendStringInfo(result, "%s\n\n", xmlschema);
2643 
2644  SPI_connect();
2645 
2646  relid_list = schema_get_xml_visible_tables(nspid);
2647 
2648  SPI_push();
2649 
2650  foreach(cell, relid_list)
2651  {
2652  Oid relid = lfirst_oid(cell);
2653  StringInfo subres;
2654 
2655  subres = table_to_xml_internal(relid, NULL, nulls, tableforest,
2656  targetns, false);
2657 
2658  appendStringInfoString(result, subres->data);
2659  appendStringInfoChar(result, '\n');
2660  }
2661 
2662  SPI_pop();
2663  SPI_finish();
2664 
2665  xmldata_root_element_end(result, xmlsn);
2666 
2667  return result;
2668 }
2669 
2670 
2671 Datum
2673 {
2674  Name name = PG_GETARG_NAME(0);
2675  bool nulls = PG_GETARG_BOOL(1);
2676  bool tableforest = PG_GETARG_BOOL(2);
2677  const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2678 
2679  char *schemaname;
2680  Oid nspid;
2681 
2682  schemaname = NameStr(*name);
2683  nspid = LookupExplicitNamespace(schemaname, false);
2684 
2686  nulls, tableforest, targetns, true)));
2687 }
2688 
2689 
2690 /*
2691  * Write the start element of the root element of an XML Schema mapping.
2692  */
2693 static void
2694 xsd_schema_element_start(StringInfo result, const char *targetns)
2695 {
2696  appendStringInfoString(result,
2697  "<xsd:schema\n"
2698  " xmlns:xsd=\"" NAMESPACE_XSD "\"");
2699  if (strlen(targetns) > 0)
2700  appendStringInfo(result,
2701  "\n"
2702  " targetNamespace=\"%s\"\n"
2703  " elementFormDefault=\"qualified\"",
2704  targetns);
2705  appendStringInfoString(result,
2706  ">\n\n");
2707 }
2708 
2709 
2710 static void
2712 {
2713  appendStringInfoString(result, "</xsd:schema>");
2714 }
2715 
2716 
2717 static StringInfo
2718 schema_to_xmlschema_internal(const char *schemaname, bool nulls,
2719  bool tableforest, const char *targetns)
2720 {
2721  Oid nspid;
2722  List *relid_list;
2723  List *tupdesc_list;
2724  ListCell *cell;
2725  StringInfo result;
2726 
2727  result = makeStringInfo();
2728 
2729  nspid = LookupExplicitNamespace(schemaname, false);
2730 
2731  xsd_schema_element_start(result, targetns);
2732 
2733  SPI_connect();
2734 
2735  relid_list = schema_get_xml_visible_tables(nspid);
2736 
2737  tupdesc_list = NIL;
2738  foreach(cell, relid_list)
2739  {
2740  Relation rel;
2741 
2742  rel = heap_open(lfirst_oid(cell), AccessShareLock);
2743  tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att));
2744  heap_close(rel, NoLock);
2745  }
2746 
2747  appendStringInfoString(result,
2748  map_sql_typecoll_to_xmlschema_types(tupdesc_list));
2749 
2750  appendStringInfoString(result,
2751  map_sql_schema_to_xmlschema_types(nspid, relid_list,
2752  nulls, tableforest, targetns));
2753 
2754  xsd_schema_element_end(result);
2755 
2756  SPI_finish();
2757 
2758  return result;
2759 }
2760 
2761 
2762 Datum
2764 {
2765  Name name = PG_GETARG_NAME(0);
2766  bool nulls = PG_GETARG_BOOL(1);
2767  bool tableforest = PG_GETARG_BOOL(2);
2768  const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2769 
2771  nulls, tableforest, targetns)));
2772 }
2773 
2774 
2775 Datum
2777 {
2778  Name name = PG_GETARG_NAME(0);
2779  bool nulls = PG_GETARG_BOOL(1);
2780  bool tableforest = PG_GETARG_BOOL(2);
2781  const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2782  char *schemaname;
2783  Oid nspid;
2784  StringInfo xmlschema;
2785 
2786  schemaname = NameStr(*name);
2787  nspid = LookupExplicitNamespace(schemaname, false);
2788 
2789  xmlschema = schema_to_xmlschema_internal(schemaname, nulls,
2790  tableforest, targetns);
2791 
2793  xmlschema->data, nulls,
2794  tableforest, targetns, true)));
2795 }
2796 
2797 
2798 /*
2799  * Map SQL database to XML and/or XML Schema document; see SQL/XML:2008
2800  * sections 9.16, 9.17.
2801  */
2802 
2803 static StringInfo
2804 database_to_xml_internal(const char *xmlschema, bool nulls,
2805  bool tableforest, const char *targetns)
2806 {
2807  StringInfo result;
2808  List *nspid_list;
2809  ListCell *cell;
2810  char *xmlcn;
2811 
2813  true, false);
2814  result = makeStringInfo();
2815 
2816  xmldata_root_element_start(result, xmlcn, xmlschema, targetns, true);
2817  appendStringInfoString(result, "\n");
2818 
2819  if (xmlschema)
2820  appendStringInfo(result, "%s\n\n", xmlschema);
2821 
2822  SPI_connect();
2823 
2824  nspid_list = database_get_xml_visible_schemas();
2825 
2826  SPI_push();
2827 
2828  foreach(cell, nspid_list)
2829  {
2830  Oid nspid = lfirst_oid(cell);
2831  StringInfo subres;
2832 
2833  subres = schema_to_xml_internal(nspid, NULL, nulls,
2834  tableforest, targetns, false);
2835 
2836  appendStringInfoString(result, subres->data);
2837  appendStringInfoChar(result, '\n');
2838  }
2839 
2840  SPI_pop();
2841  SPI_finish();
2842 
2843  xmldata_root_element_end(result, xmlcn);
2844 
2845  return result;
2846 }
2847 
2848 
2849 Datum
2851 {
2852  bool nulls = PG_GETARG_BOOL(0);
2853  bool tableforest = PG_GETARG_BOOL(1);
2854  const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
2855 
2857  tableforest, targetns)));
2858 }
2859 
2860 
2861 static StringInfo
2862 database_to_xmlschema_internal(bool nulls, bool tableforest,
2863  const char *targetns)
2864 {
2865  List *relid_list;
2866  List *nspid_list;
2867  List *tupdesc_list;
2868  ListCell *cell;
2869  StringInfo result;
2870 
2871  result = makeStringInfo();
2872 
2873  xsd_schema_element_start(result, targetns);
2874 
2875  SPI_connect();
2876 
2877  relid_list = database_get_xml_visible_tables();
2878  nspid_list = database_get_xml_visible_schemas();
2879 
2880  tupdesc_list = NIL;
2881  foreach(cell, relid_list)
2882  {
2883  Relation rel;
2884 
2885  rel = heap_open(lfirst_oid(cell), AccessShareLock);
2886  tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att));
2887  heap_close(rel, NoLock);
2888  }
2889 
2890  appendStringInfoString(result,
2891  map_sql_typecoll_to_xmlschema_types(tupdesc_list));
2892 
2893  appendStringInfoString(result,
2894  map_sql_catalog_to_xmlschema_types(nspid_list, nulls, tableforest, targetns));
2895 
2896  xsd_schema_element_end(result);
2897 
2898  SPI_finish();
2899 
2900  return result;
2901 }
2902 
2903 
2904 Datum
2906 {
2907  bool nulls = PG_GETARG_BOOL(0);
2908  bool tableforest = PG_GETARG_BOOL(1);
2909  const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
2910 
2912  tableforest, targetns)));
2913 }
2914 
2915 
2916 Datum
2918 {
2919  bool nulls = PG_GETARG_BOOL(0);
2920  bool tableforest = PG_GETARG_BOOL(1);
2921  const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
2922  StringInfo xmlschema;
2923 
2924  xmlschema = database_to_xmlschema_internal(nulls, tableforest, targetns);
2925 
2927  nulls, tableforest, targetns)));
2928 }
2929 
2930 
2931 /*
2932  * Map a multi-part SQL name to an XML name; see SQL/XML:2008 section
2933  * 9.2.
2934  */
2935 static char *
2936 map_multipart_sql_identifier_to_xml_name(char *a, char *b, char *c, char *d)
2937 {
2938  StringInfoData result;
2939 
2940  initStringInfo(&result);
2941 
2942  if (a)
2943  appendStringInfoString(&result,
2944  map_sql_identifier_to_xml_name(a, true, true));
2945  if (b)
2946  appendStringInfo(&result, ".%s",
2947  map_sql_identifier_to_xml_name(b, true, true));
2948  if (c)
2949  appendStringInfo(&result, ".%s",
2950  map_sql_identifier_to_xml_name(c, true, true));
2951  if (d)
2952  appendStringInfo(&result, ".%s",
2953  map_sql_identifier_to_xml_name(d, true, true));
2954 
2955  return result.data;
2956 }
2957 
2958 
2959 /*
2960  * Map an SQL table to an XML Schema document; see SQL/XML:2008
2961  * section 9.11.
2962  *
2963  * Map an SQL table to XML Schema data types; see SQL/XML:2008 section
2964  * 9.9.
2965  */
2966 static const char *
2967 map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid, bool nulls,
2968  bool tableforest, const char *targetns)
2969 {
2970  int i;
2971  char *xmltn;
2972  char *tabletypename;
2973  char *rowtypename;
2974  StringInfoData result;
2975 
2976  initStringInfo(&result);
2977 
2978  if (OidIsValid(relid))
2979  {
2980  HeapTuple tuple;
2981  Form_pg_class reltuple;
2982 
2983  tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
2984  if (!HeapTupleIsValid(tuple))
2985  elog(ERROR, "cache lookup failed for relation %u", relid);
2986  reltuple = (Form_pg_class) GETSTRUCT(tuple);
2987 
2988  xmltn = map_sql_identifier_to_xml_name(NameStr(reltuple->relname),
2989  true, false);
2990 
2991  tabletypename = map_multipart_sql_identifier_to_xml_name("TableType",
2993  get_namespace_name(reltuple->relnamespace),
2994  NameStr(reltuple->relname));
2995 
2996  rowtypename = map_multipart_sql_identifier_to_xml_name("RowType",
2998  get_namespace_name(reltuple->relnamespace),
2999  NameStr(reltuple->relname));
3000 
3001  ReleaseSysCache(tuple);
3002  }
3003  else
3004  {
3005  if (tableforest)
3006  xmltn = "row";
3007  else
3008  xmltn = "table";
3009 
3010  tabletypename = "TableType";
3011  rowtypename = "RowType";
3012  }
3013 
3014  xsd_schema_element_start(&result, targetns);
3015 
3016  appendStringInfoString(&result,
3018 
3019  appendStringInfo(&result,
3020  "<xsd:complexType name=\"%s\">\n"
3021  " <xsd:sequence>\n",
3022  rowtypename);
3023 
3024  for (i = 0; i < tupdesc->natts; i++)
3025  {
3026  if (tupdesc->attrs[i]->attisdropped)
3027  continue;
3028  appendStringInfo(&result,
3029  " <xsd:element name=\"%s\" type=\"%s\"%s></xsd:element>\n",
3030  map_sql_identifier_to_xml_name(NameStr(tupdesc->attrs[i]->attname),
3031  true, false),
3032  map_sql_type_to_xml_name(tupdesc->attrs[i]->atttypid, -1),
3033  nulls ? " nillable=\"true\"" : " minOccurs=\"0\"");
3034  }
3035 
3036  appendStringInfoString(&result,
3037  " </xsd:sequence>\n"
3038  "</xsd:complexType>\n\n");
3039 
3040  if (!tableforest)
3041  {
3042  appendStringInfo(&result,
3043  "<xsd:complexType name=\"%s\">\n"
3044  " <xsd:sequence>\n"
3045  " <xsd:element name=\"row\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n"
3046  " </xsd:sequence>\n"
3047  "</xsd:complexType>\n\n",
3048  tabletypename, rowtypename);
3049 
3050  appendStringInfo(&result,
3051  "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3052  xmltn, tabletypename);
3053  }
3054  else
3055  appendStringInfo(&result,
3056  "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3057  xmltn, rowtypename);
3058 
3059  xsd_schema_element_end(&result);
3060 
3061  return result.data;
3062 }
3063 
3064 
3065 /*
3066  * Map an SQL schema to XML Schema data types; see SQL/XML:2008
3067  * section 9.12.
3068  */
3069 static const char *
3070 map_sql_schema_to_xmlschema_types(Oid nspid, List *relid_list, bool nulls,
3071  bool tableforest, const char *targetns)
3072 {
3073  char *dbname;
3074  char *nspname;
3075  char *xmlsn;
3076  char *schematypename;
3077  StringInfoData result;
3078  ListCell *cell;
3079 
3080  dbname = get_database_name(MyDatabaseId);
3081  nspname = get_namespace_name(nspid);
3082 
3083  initStringInfo(&result);
3084 
3085  xmlsn = map_sql_identifier_to_xml_name(nspname, true, false);
3086 
3087  schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType",
3088  dbname,
3089  nspname,
3090  NULL);
3091 
3092  appendStringInfo(&result,
3093  "<xsd:complexType name=\"%s\">\n", schematypename);
3094  if (!tableforest)
3095  appendStringInfoString(&result,
3096  " <xsd:all>\n");
3097  else
3098  appendStringInfoString(&result,
3099  " <xsd:sequence>\n");
3100 
3101  foreach(cell, relid_list)
3102  {
3103  Oid relid = lfirst_oid(cell);
3104  char *relname = get_rel_name(relid);
3105  char *xmltn = map_sql_identifier_to_xml_name(relname, true, false);
3106  char *tabletypename = map_multipart_sql_identifier_to_xml_name(tableforest ? "RowType" : "TableType",
3107  dbname,
3108  nspname,
3109  relname);
3110 
3111  if (!tableforest)
3112  appendStringInfo(&result,
3113  " <xsd:element name=\"%s\" type=\"%s\"/>\n",
3114  xmltn, tabletypename);
3115  else
3116  appendStringInfo(&result,
3117  " <xsd:element name=\"%s\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n",
3118  xmltn, tabletypename);
3119  }
3120 
3121  if (!tableforest)
3122  appendStringInfoString(&result,
3123  " </xsd:all>\n");
3124  else
3125  appendStringInfoString(&result,
3126  " </xsd:sequence>\n");
3127  appendStringInfoString(&result,
3128  "</xsd:complexType>\n\n");
3129 
3130  appendStringInfo(&result,
3131  "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3132  xmlsn, schematypename);
3133 
3134  return result.data;
3135 }
3136 
3137 
3138 /*
3139  * Map an SQL catalog to XML Schema data types; see SQL/XML:2008
3140  * section 9.15.
3141  */
3142 static const char *
3144  bool tableforest, const char *targetns)
3145 {
3146  char *dbname;
3147  char *xmlcn;
3148  char *catalogtypename;
3149  StringInfoData result;
3150  ListCell *cell;
3151 
3152  dbname = get_database_name(MyDatabaseId);
3153 
3154  initStringInfo(&result);
3155 
3156  xmlcn = map_sql_identifier_to_xml_name(dbname, true, false);
3157 
3158  catalogtypename = map_multipart_sql_identifier_to_xml_name("CatalogType",
3159  dbname,
3160  NULL,
3161  NULL);
3162 
3163  appendStringInfo(&result,
3164  "<xsd:complexType name=\"%s\">\n", catalogtypename);
3165  appendStringInfoString(&result,
3166  " <xsd:all>\n");
3167 
3168  foreach(cell, nspid_list)
3169  {
3170  Oid nspid = lfirst_oid(cell);
3171  char *nspname = get_namespace_name(nspid);
3172  char *xmlsn = map_sql_identifier_to_xml_name(nspname, true, false);
3173  char *schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType",
3174  dbname,
3175  nspname,
3176  NULL);
3177 
3178  appendStringInfo(&result,
3179  " <xsd:element name=\"%s\" type=\"%s\"/>\n",
3180  xmlsn, schematypename);
3181  }
3182 
3183  appendStringInfoString(&result,
3184  " </xsd:all>\n");
3185  appendStringInfoString(&result,
3186  "</xsd:complexType>\n\n");
3187 
3188  appendStringInfo(&result,
3189  "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3190  xmlcn, catalogtypename);
3191 
3192  return result.data;
3193 }
3194 
3195 
3196 /*
3197  * Map an SQL data type to an XML name; see SQL/XML:2008 section 9.4.
3198  */
3199 static const char *
3200 map_sql_type_to_xml_name(Oid typeoid, int typmod)
3201 {
3202  StringInfoData result;
3203 
3204  initStringInfo(&result);
3205 
3206  switch (typeoid)
3207  {
3208  case BPCHAROID:
3209  if (typmod == -1)
3210  appendStringInfoString(&result, "CHAR");
3211  else
3212  appendStringInfo(&result, "CHAR_%d", typmod - VARHDRSZ);
3213  break;
3214  case VARCHAROID:
3215  if (typmod == -1)
3216  appendStringInfoString(&result, "VARCHAR");
3217  else
3218  appendStringInfo(&result, "VARCHAR_%d", typmod - VARHDRSZ);
3219  break;
3220  case NUMERICOID:
3221  if (typmod == -1)
3222  appendStringInfoString(&result, "NUMERIC");
3223  else
3224  appendStringInfo(&result, "NUMERIC_%d_%d",
3225  ((typmod - VARHDRSZ) >> 16) & 0xffff,
3226  (typmod - VARHDRSZ) & 0xffff);
3227  break;
3228  case INT4OID:
3229  appendStringInfoString(&result, "INTEGER");
3230  break;
3231  case INT2OID:
3232  appendStringInfoString(&result, "SMALLINT");
3233  break;
3234  case INT8OID:
3235  appendStringInfoString(&result, "BIGINT");
3236  break;
3237  case FLOAT4OID:
3238  appendStringInfoString(&result, "REAL");
3239  break;
3240  case FLOAT8OID:
3241  appendStringInfoString(&result, "DOUBLE");
3242  break;
3243  case BOOLOID:
3244  appendStringInfoString(&result, "BOOLEAN");
3245  break;
3246  case TIMEOID:
3247  if (typmod == -1)
3248  appendStringInfoString(&result, "TIME");
3249  else
3250  appendStringInfo(&result, "TIME_%d", typmod);
3251  break;
3252  case TIMETZOID:
3253  if (typmod == -1)
3254  appendStringInfoString(&result, "TIME_WTZ");
3255  else
3256  appendStringInfo(&result, "TIME_WTZ_%d", typmod);
3257  break;
3258  case TIMESTAMPOID:
3259  if (typmod == -1)
3260  appendStringInfoString(&result, "TIMESTAMP");
3261  else
3262  appendStringInfo(&result, "TIMESTAMP_%d", typmod);
3263  break;
3264  case TIMESTAMPTZOID:
3265  if (typmod == -1)
3266  appendStringInfoString(&result, "TIMESTAMP_WTZ");
3267  else
3268  appendStringInfo(&result, "TIMESTAMP_WTZ_%d", typmod);
3269  break;
3270  case DATEOID:
3271  appendStringInfoString(&result, "DATE");
3272  break;
3273  case XMLOID:
3274  appendStringInfoString(&result, "XML");
3275  break;
3276  default:
3277  {
3278  HeapTuple tuple;
3279  Form_pg_type typtuple;
3280 
3281  tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typeoid));
3282  if (!HeapTupleIsValid(tuple))
3283  elog(ERROR, "cache lookup failed for type %u", typeoid);
3284  typtuple = (Form_pg_type) GETSTRUCT(tuple);
3285 
3286  appendStringInfoString(&result,
3287  map_multipart_sql_identifier_to_xml_name((typtuple->typtype == TYPTYPE_DOMAIN) ? "Domain" : "UDT",
3289  get_namespace_name(typtuple->typnamespace),
3290  NameStr(typtuple->typname)));
3291 
3292  ReleaseSysCache(tuple);
3293  }
3294  }
3295 
3296  return result.data;
3297 }
3298 
3299 
3300 /*
3301  * Map a collection of SQL data types to XML Schema data types; see
3302  * SQL/XML:2008 section 9.7.
3303  */
3304 static const char *
3306 {
3307  List *uniquetypes = NIL;
3308  int i;
3309  StringInfoData result;
3310  ListCell *cell0;
3311 
3312  /* extract all column types used in the set of TupleDescs */
3313  foreach(cell0, tupdesc_list)
3314  {
3315  TupleDesc tupdesc = (TupleDesc) lfirst(cell0);
3316 
3317  for (i = 0; i < tupdesc->natts; i++)
3318  {
3319  if (tupdesc->attrs[i]->attisdropped)
3320  continue;
3321  uniquetypes = list_append_unique_oid(uniquetypes,
3322  tupdesc->attrs[i]->atttypid);
3323  }
3324  }
3325 
3326  /* add base types of domains */
3327  foreach(cell0, uniquetypes)
3328  {
3329  Oid typid = lfirst_oid(cell0);
3330  Oid basetypid = getBaseType(typid);
3331 
3332  if (basetypid != typid)
3333  uniquetypes = list_append_unique_oid(uniquetypes, basetypid);
3334  }
3335 
3336  /* Convert to textual form */
3337  initStringInfo(&result);
3338 
3339  foreach(cell0, uniquetypes)
3340  {
3341  appendStringInfo(&result, "%s\n",
3343  -1));
3344  }
3345 
3346  return result.data;
3347 }
3348 
3349 
3350 /*
3351  * Map an SQL data type to a named XML Schema data type; see
3352  * SQL/XML:2008 sections 9.5 and 9.6.
3353  *
3354  * (The distinction between 9.5 and 9.6 is basically that 9.6 adds
3355  * a name attribute, which this function does. The name-less version
3356  * 9.5 doesn't appear to be required anywhere.)
3357  */
3358 static const char *
3360 {
3361  StringInfoData result;
3362  const char *typename = map_sql_type_to_xml_name(typeoid, typmod);
3363 
3364  initStringInfo(&result);
3365 
3366  if (typeoid == XMLOID)
3367  {
3368  appendStringInfoString(&result,
3369  "<xsd:complexType mixed=\"true\">\n"
3370  " <xsd:sequence>\n"
3371  " <xsd:any name=\"element\" minOccurs=\"0\" maxOccurs=\"unbounded\" processContents=\"skip\"/>\n"
3372  " </xsd:sequence>\n"
3373  "</xsd:complexType>\n");
3374  }
3375  else
3376  {
3377  appendStringInfo(&result,
3378  "<xsd:simpleType name=\"%s\">\n", typename);
3379 
3380  switch (typeoid)
3381  {
3382  case BPCHAROID:
3383  case VARCHAROID:
3384  case TEXTOID:
3385  appendStringInfo(&result,
3386  " <xsd:restriction base=\"xsd:string\">\n");
3387  if (typmod != -1)
3388  appendStringInfo(&result,
3389  " <xsd:maxLength value=\"%d\"/>\n",
3390  typmod - VARHDRSZ);
3391  appendStringInfoString(&result, " </xsd:restriction>\n");
3392  break;
3393 
3394  case BYTEAOID:
3395  appendStringInfo(&result,
3396  " <xsd:restriction base=\"xsd:%s\">\n"
3397  " </xsd:restriction>\n",
3398  xmlbinary == XMLBINARY_BASE64 ? "base64Binary" : "hexBinary");
3399  break;
3400 
3401  case NUMERICOID:
3402  if (typmod != -1)
3403  appendStringInfo(&result,
3404  " <xsd:restriction base=\"xsd:decimal\">\n"
3405  " <xsd:totalDigits value=\"%d\"/>\n"
3406  " <xsd:fractionDigits value=\"%d\"/>\n"
3407  " </xsd:restriction>\n",
3408  ((typmod - VARHDRSZ) >> 16) & 0xffff,
3409  (typmod - VARHDRSZ) & 0xffff);
3410  break;
3411 
3412  case INT2OID:
3413  appendStringInfo(&result,
3414  " <xsd:restriction base=\"xsd:short\">\n"
3415  " <xsd:maxInclusive value=\"%d\"/>\n"
3416  " <xsd:minInclusive value=\"%d\"/>\n"
3417  " </xsd:restriction>\n",
3418  SHRT_MAX, SHRT_MIN);
3419  break;
3420 
3421  case INT4OID:
3422  appendStringInfo(&result,
3423  " <xsd:restriction base=\"xsd:int\">\n"
3424  " <xsd:maxInclusive value=\"%d\"/>\n"
3425  " <xsd:minInclusive value=\"%d\"/>\n"
3426  " </xsd:restriction>\n",
3427  INT_MAX, INT_MIN);
3428  break;
3429 
3430  case INT8OID:
3431  appendStringInfo(&result,
3432  " <xsd:restriction base=\"xsd:long\">\n"
3433  " <xsd:maxInclusive value=\"" INT64_FORMAT "\"/>\n"
3434  " <xsd:minInclusive value=\"" INT64_FORMAT "\"/>\n"
3435  " </xsd:restriction>\n",
3436  (((uint64) 1) << (sizeof(int64) * 8 - 1)) - 1,
3437  (((uint64) 1) << (sizeof(int64) * 8 - 1)));
3438  break;
3439 
3440  case FLOAT4OID:
3441  appendStringInfoString(&result,
3442  " <xsd:restriction base=\"xsd:float\"></xsd:restriction>\n");
3443  break;
3444 
3445  case FLOAT8OID:
3446  appendStringInfoString(&result,
3447  " <xsd:restriction base=\"xsd:double\"></xsd:restriction>\n");
3448  break;
3449 
3450  case BOOLOID:
3451  appendStringInfoString(&result,
3452  " <xsd:restriction base=\"xsd:boolean\"></xsd:restriction>\n");
3453  break;
3454 
3455  case TIMEOID:
3456  case TIMETZOID:
3457  {
3458  const char *tz = (typeoid == TIMETZOID ? "(+|-)\\p{Nd}{2}:\\p{Nd}{2}" : "");
3459 
3460  if (typmod == -1)
3461  appendStringInfo(&result,
3462  " <xsd:restriction base=\"xsd:time\">\n"
3463  " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n"
3464  " </xsd:restriction>\n", tz);
3465  else if (typmod == 0)
3466  appendStringInfo(&result,
3467  " <xsd:restriction base=\"xsd:time\">\n"
3468  " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n"
3469  " </xsd:restriction>\n", tz);
3470  else
3471  appendStringInfo(&result,
3472  " <xsd:restriction base=\"xsd:time\">\n"
3473  " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n"
3474  " </xsd:restriction>\n", typmod - VARHDRSZ, tz);
3475  break;
3476  }
3477 
3478  case TIMESTAMPOID:
3479  case TIMESTAMPTZOID:
3480  {
3481  const char *tz = (typeoid == TIMESTAMPTZOID ? "(+|-)\\p{Nd}{2}:\\p{Nd}{2}" : "");
3482 
3483  if (typmod == -1)
3484  appendStringInfo(&result,
3485  " <xsd:restriction base=\"xsd:dateTime\">\n"
3486  " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n"
3487  " </xsd:restriction>\n", tz);
3488  else if (typmod == 0)
3489  appendStringInfo(&result,
3490  " <xsd:restriction base=\"xsd:dateTime\">\n"
3491  " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n"
3492  " </xsd:restriction>\n", tz);
3493  else
3494  appendStringInfo(&result,
3495  " <xsd:restriction base=\"xsd:dateTime\">\n"
3496  " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n"
3497  " </xsd:restriction>\n", typmod - VARHDRSZ, tz);
3498  break;
3499  }
3500 
3501  case DATEOID:
3502  appendStringInfoString(&result,
3503  " <xsd:restriction base=\"xsd:date\">\n"
3504  " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}\"/>\n"
3505  " </xsd:restriction>\n");
3506  break;
3507 
3508  default:
3509  if (get_typtype(typeoid) == TYPTYPE_DOMAIN)
3510  {
3511  Oid base_typeoid;
3512  int32 base_typmod = -1;
3513 
3514  base_typeoid = getBaseTypeAndTypmod(typeoid, &base_typmod);
3515 
3516  appendStringInfo(&result,
3517  " <xsd:restriction base=\"%s\"/>\n",
3518  map_sql_type_to_xml_name(base_typeoid, base_typmod));
3519  }
3520  break;
3521  }
3522  appendStringInfoString(&result, "</xsd:simpleType>\n");
3523  }
3524 
3525  return result.data;
3526 }
3527 
3528 
3529 /*
3530  * Map an SQL row to an XML element, taking the row from the active
3531  * SPI cursor. See also SQL/XML:2008 section 9.10.
3532  */
3533 static void
3534 SPI_sql_row_to_xmlelement(int rownum, StringInfo result, char *tablename,
3535  bool nulls, bool tableforest,
3536  const char *targetns, bool top_level)
3537 {
3538  int i;
3539  char *xmltn;
3540 
3541  if (tablename)
3542  xmltn = map_sql_identifier_to_xml_name(tablename, true, false);
3543  else
3544  {
3545  if (tableforest)
3546  xmltn = "row";
3547  else
3548  xmltn = "table";
3549  }
3550 
3551  if (tableforest)
3552  xmldata_root_element_start(result, xmltn, NULL, targetns, top_level);
3553  else
3554  appendStringInfoString(result, "<row>\n");
3555 
3556  for (i = 1; i <= SPI_tuptable->tupdesc->natts; i++)
3557  {
3558  char *colname;
3559  Datum colval;
3560  bool isnull;
3561 
3563  true, false);
3564  colval = SPI_getbinval(SPI_tuptable->vals[rownum],
3566  i,
3567  &isnull);
3568  if (isnull)
3569  {
3570  if (nulls)
3571  appendStringInfo(result, " <%s xsi:nil=\"true\"/>\n", colname);
3572  }
3573  else
3574  appendStringInfo(result, " <%s>%s</%s>\n",
3575  colname,
3577  SPI_gettypeid(SPI_tuptable->tupdesc, i), true),
3578  colname);
3579  }
3580 
3581  if (tableforest)
3582  {
3583  xmldata_root_element_end(result, xmltn);
3584  appendStringInfoChar(result, '\n');
3585  }
3586  else
3587  appendStringInfoString(result, "</row>\n\n");
3588 }
3589 
3590 
3591 /*
3592  * XPath related functions
3593  */
3594 
3595 #ifdef USE_LIBXML
3596 
3597 /*
3598  * Convert XML node to text (dump subtree in case of element,
3599  * return value otherwise)
3600  */
3601 static text *
3602 xml_xmlnodetoxmltype(xmlNodePtr cur)
3603 {
3604  xmltype *result;
3605 
3606  if (cur->type == XML_ELEMENT_NODE)
3607  {
3608  xmlBufferPtr buf;
3609 
3610  buf = xmlBufferCreate();
3611  PG_TRY();
3612  {
3613  xmlNodeDump(buf, NULL, cur, 0, 1);
3614  result = xmlBuffer_to_xmltype(buf);
3615  }
3616  PG_CATCH();
3617  {
3618  xmlBufferFree(buf);
3619  PG_RE_THROW();
3620  }
3621  PG_END_TRY();
3622  xmlBufferFree(buf);
3623  }
3624  else
3625  {
3626  xmlChar *str;
3627 
3628  str = xmlXPathCastNodeToString(cur);
3629  PG_TRY();
3630  {
3631  /* Here we rely on XML having the same representation as TEXT */
3632  char *escaped = escape_xml((char *) str);
3633 
3634  result = (xmltype *) cstring_to_text(escaped);
3635  pfree(escaped);
3636  }
3637  PG_CATCH();
3638  {
3639  xmlFree(str);
3640  PG_RE_THROW();
3641  }
3642  PG_END_TRY();
3643  xmlFree(str);
3644  }
3645 
3646  return result;
3647 }
3648 
3649 /*
3650  * Convert an XML XPath object (the result of evaluating an XPath expression)
3651  * to an array of xml values, which is returned at *astate. The function
3652  * result value is the number of elements in the array.
3653  *
3654  * If "astate" is NULL then we don't generate the array value, but we still
3655  * return the number of elements it would have had.
3656  *
3657  * Nodesets are converted to an array containing the nodes' textual
3658  * representations. Primitive values (float, double, string) are converted
3659  * to a single-element array containing the value's string representation.
3660  */
3661 static int
3662 xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
3663  ArrayBuildState **astate)
3664 {
3665  int result = 0;
3666  Datum datum;
3667  Oid datumtype;
3668  char *result_str;
3669 
3670  if (astate != NULL)
3671  *astate = NULL;
3672 
3673  switch (xpathobj->type)
3674  {
3675  case XPATH_NODESET:
3676  if (xpathobj->nodesetval != NULL)
3677  {
3678  result = xpathobj->nodesetval->nodeNr;
3679  if (astate != NULL)
3680  {
3681  int i;
3682 
3683  for (i = 0; i < result; i++)
3684  {
3685  datum = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i]));
3686  *astate = accumArrayResult(*astate, datum,
3687  false, XMLOID,
3689  }
3690  }
3691  }
3692  return result;
3693 
3694  case XPATH_BOOLEAN:
3695  if (astate == NULL)
3696  return 1;
3697  datum = BoolGetDatum(xpathobj->boolval);
3698  datumtype = BOOLOID;
3699  break;
3700 
3701  case XPATH_NUMBER:
3702  if (astate == NULL)
3703  return 1;
3704  datum = Float8GetDatum(xpathobj->floatval);
3705  datumtype = FLOAT8OID;
3706  break;
3707 
3708  case XPATH_STRING:
3709  if (astate == NULL)
3710  return 1;
3711  datum = CStringGetDatum((char *) xpathobj->stringval);
3712  datumtype = CSTRINGOID;
3713  break;
3714 
3715  default:
3716  elog(ERROR, "xpath expression result type %d is unsupported",
3717  xpathobj->type);
3718  return 0; /* keep compiler quiet */
3719  }
3720 
3721  /* Common code for scalar-value cases */
3722  result_str = map_sql_value_to_xml_value(datum, datumtype, true);
3723  datum = PointerGetDatum(cstring_to_xmltype(result_str));
3724  *astate = accumArrayResult(*astate, datum,
3725  false, XMLOID,
3727  return 1;
3728 }
3729 
3730 
3731 /*
3732  * Common code for xpath() and xmlexists()
3733  *
3734  * Evaluate XPath expression and return number of nodes in res_items
3735  * and array of XML values in astate. Either of those pointers can be
3736  * NULL if the corresponding result isn't wanted.
3737  *
3738  * It is up to the user to ensure that the XML passed is in fact
3739  * an XML document - XPath doesn't work easily on fragments without
3740  * a context node being known.
3741  */
3742 static void
3743 xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
3744  int *res_nitems, ArrayBuildState **astate)
3745 {
3746  PgXmlErrorContext *xmlerrcxt;
3747  volatile xmlParserCtxtPtr ctxt = NULL;
3748  volatile xmlDocPtr doc = NULL;
3749  volatile xmlXPathContextPtr xpathctx = NULL;
3750  volatile xmlXPathCompExprPtr xpathcomp = NULL;
3751  volatile xmlXPathObjectPtr xpathobj = NULL;
3752  char *datastr;
3753  int32 len;
3754  int32 xpath_len;
3755  xmlChar *string;
3756  xmlChar *xpath_expr;
3757  int i;
3758  int ndim;
3759  Datum *ns_names_uris;
3760  bool *ns_names_uris_nulls;
3761  int ns_count;
3762 
3763  /*
3764  * Namespace mappings are passed as text[]. If an empty array is passed
3765  * (ndim = 0, "0-dimensional"), then there are no namespace mappings.
3766  * Else, a 2-dimensional array with length of the second axis being equal
3767  * to 2 should be passed, i.e., every subarray contains 2 elements, the
3768  * first element defining the name, the second one the URI. Example:
3769  * ARRAY[ARRAY['myns', 'http://example.com'], ARRAY['myns2',
3770  * 'http://example2.com']].
3771  */
3772  ndim = namespaces ? ARR_NDIM(namespaces) : 0;
3773  if (ndim != 0)
3774  {
3775  int *dims;
3776 
3777  dims = ARR_DIMS(namespaces);
3778 
3779  if (ndim != 2 || dims[1] != 2)
3780  ereport(ERROR,
3781  (errcode(ERRCODE_DATA_EXCEPTION),
3782  errmsg("invalid array for XML namespace mapping"),
3783  errdetail("The array must be two-dimensional with length of the second axis equal to 2.")));
3784 
3785  Assert(ARR_ELEMTYPE(namespaces) == TEXTOID);
3786 
3787  deconstruct_array(namespaces, TEXTOID, -1, false, 'i',
3788  &ns_names_uris, &ns_names_uris_nulls,
3789  &ns_count);
3790 
3791  Assert((ns_count % 2) == 0); /* checked above */
3792  ns_count /= 2; /* count pairs only */
3793  }
3794  else
3795  {
3796  ns_names_uris = NULL;
3797  ns_names_uris_nulls = NULL;
3798  ns_count = 0;
3799  }
3800 
3801  datastr = VARDATA(data);
3802  len = VARSIZE(data) - VARHDRSZ;
3803  xpath_len = VARSIZE(xpath_expr_text) - VARHDRSZ;
3804  if (xpath_len == 0)
3805  ereport(ERROR,
3806  (errcode(ERRCODE_DATA_EXCEPTION),
3807  errmsg("empty XPath expression")));
3808 
3809  string = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
3810  memcpy(string, datastr, len);
3811  string[len] = '\0';
3812 
3813  xpath_expr = (xmlChar *) palloc((xpath_len + 1) * sizeof(xmlChar));
3814  memcpy(xpath_expr, VARDATA(xpath_expr_text), xpath_len);
3815  xpath_expr[xpath_len] = '\0';
3816 
3817  xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
3818 
3819  PG_TRY();
3820  {
3821  xmlInitParser();
3822 
3823  /*
3824  * redundant XML parsing (two parsings for the same value during one
3825  * command execution are possible)
3826  */
3827  ctxt = xmlNewParserCtxt();
3828  if (ctxt == NULL || xmlerrcxt->err_occurred)
3829  xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
3830  "could not allocate parser context");
3831  doc = xmlCtxtReadMemory(ctxt, (char *) string, len, NULL, NULL, 0);
3832  if (doc == NULL || xmlerrcxt->err_occurred)
3833  xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
3834  "could not parse XML document");
3835  xpathctx = xmlXPathNewContext(doc);
3836  if (xpathctx == NULL || xmlerrcxt->err_occurred)
3837  xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
3838  "could not allocate XPath context");
3839  xpathctx->node = xmlDocGetRootElement(doc);
3840  if (xpathctx->node == NULL || xmlerrcxt->err_occurred)
3841  xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
3842  "could not find root XML element");
3843 
3844  /* register namespaces, if any */
3845  if (ns_count > 0)
3846  {
3847  for (i = 0; i < ns_count; i++)
3848  {
3849  char *ns_name;
3850  char *ns_uri;
3851 
3852  if (ns_names_uris_nulls[i * 2] ||
3853  ns_names_uris_nulls[i * 2 + 1])
3854  ereport(ERROR,
3855  (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
3856  errmsg("neither namespace name nor URI may be null")));
3857  ns_name = TextDatumGetCString(ns_names_uris[i * 2]);
3858  ns_uri = TextDatumGetCString(ns_names_uris[i * 2 + 1]);
3859  if (xmlXPathRegisterNs(xpathctx,
3860  (xmlChar *) ns_name,
3861  (xmlChar *) ns_uri) != 0)
3862  ereport(ERROR, /* is this an internal error??? */
3863  (errmsg("could not register XML namespace with name \"%s\" and URI \"%s\"",
3864  ns_name, ns_uri)));
3865  }
3866  }
3867 
3868  xpathcomp = xmlXPathCompile(xpath_expr);
3869  if (xpathcomp == NULL || xmlerrcxt->err_occurred)
3870  xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
3871  "invalid XPath expression");
3872 
3873  /*
3874  * Version 2.6.27 introduces a function named
3875  * xmlXPathCompiledEvalToBoolean, which would be enough for xmlexists,
3876  * but we can derive the existence by whether any nodes are returned,
3877  * thereby preventing a library version upgrade and keeping the code
3878  * the same.
3879  */
3880  xpathobj = xmlXPathCompiledEval(xpathcomp, xpathctx);
3881  if (xpathobj == NULL || xmlerrcxt->err_occurred)
3882  xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
3883  "could not create XPath object");
3884 
3885  /*
3886  * Extract the results as requested.
3887  */
3888  if (res_nitems != NULL)
3889  *res_nitems = xml_xpathobjtoxmlarray(xpathobj, astate);
3890  else
3891  (void) xml_xpathobjtoxmlarray(xpathobj, astate);
3892  }
3893  PG_CATCH();
3894  {
3895  if (xpathobj)
3896  xmlXPathFreeObject(xpathobj);
3897  if (xpathcomp)
3898  xmlXPathFreeCompExpr(xpathcomp);
3899  if (xpathctx)
3900  xmlXPathFreeContext(xpathctx);
3901  if (doc)
3902  xmlFreeDoc(doc);
3903  if (ctxt)
3904  xmlFreeParserCtxt(ctxt);
3905 
3906  pg_xml_done(xmlerrcxt, true);
3907 
3908  PG_RE_THROW();
3909  }
3910  PG_END_TRY();
3911 
3912  xmlXPathFreeObject(xpathobj);
3913  xmlXPathFreeCompExpr(xpathcomp);
3914  xmlXPathFreeContext(xpathctx);
3915  xmlFreeDoc(doc);
3916  xmlFreeParserCtxt(ctxt);
3917 
3918  pg_xml_done(xmlerrcxt, false);
3919 }
3920 #endif /* USE_LIBXML */
3921 
3922 /*
3923  * Evaluate XPath expression and return array of XML values.
3924  *
3925  * As we have no support of XQuery sequences yet, this function seems
3926  * to be the most useful one (array of XML functions plays a role of
3927  * some kind of substitution for XQuery sequences).
3928  */
3929 Datum
3931 {
3932 #ifdef USE_LIBXML
3933  text *xpath_expr_text = PG_GETARG_TEXT_P(0);
3934  xmltype *data = PG_GETARG_XML_P(1);
3935  ArrayType *namespaces = PG_GETARG_ARRAYTYPE_P(2);
3936  int res_nitems;
3937  ArrayBuildState *astate;
3938 
3939  xpath_internal(xpath_expr_text, data, namespaces,
3940  &res_nitems, &astate);
3941 
3942  if (res_nitems == 0)
3944  else
3946 #else
3947  NO_XML_SUPPORT();
3948  return 0;
3949 #endif
3950 }
3951 
3952 /*
3953  * Determines if the node specified by the supplied XPath exists
3954  * in a given XML document, returning a boolean.
3955  */
3956 Datum
3958 {
3959 #ifdef USE_LIBXML
3960  text *xpath_expr_text = PG_GETARG_TEXT_P(0);
3961  xmltype *data = PG_GETARG_XML_P(1);
3962  int res_nitems;
3963 
3964  xpath_internal(xpath_expr_text, data, NULL,
3965  &res_nitems, NULL);
3966 
3967  PG_RETURN_BOOL(res_nitems > 0);
3968 #else
3969  NO_XML_SUPPORT();
3970  return 0;
3971 #endif
3972 }
3973 
3974 /*
3975  * Determines if the node specified by the supplied XPath exists
3976  * in a given XML document, returning a boolean. Differs from
3977  * xmlexists as it supports namespaces and is not defined in SQL/XML.
3978  */
3979 Datum
3981 {
3982 #ifdef USE_LIBXML
3983  text *xpath_expr_text = PG_GETARG_TEXT_P(0);
3984  xmltype *data = PG_GETARG_XML_P(1);
3985  ArrayType *namespaces = PG_GETARG_ARRAYTYPE_P(2);
3986  int res_nitems;
3987 
3988  xpath_internal(xpath_expr_text, data, namespaces,
3989  &res_nitems, NULL);
3990 
3991  PG_RETURN_BOOL(res_nitems > 0);
3992 #else
3993  NO_XML_SUPPORT();
3994  return 0;
3995 #endif
3996 }
3997 
3998 /*
3999  * Functions for checking well-formed-ness
4000  */
4001 
4002 #ifdef USE_LIBXML
4003 static bool
4004 wellformed_xml(text *data, XmlOptionType xmloption_arg)
4005 {
4006  bool result;
4007  volatile xmlDocPtr doc = NULL;
4008 
4009  /* We want to catch any exceptions and return false */
4010  PG_TRY();
4011  {
4012  doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding());
4013  result = true;
4014  }
4015  PG_CATCH();
4016  {
4017  FlushErrorState();
4018  result = false;
4019  }
4020  PG_END_TRY();
4021 
4022  if (doc)
4023  xmlFreeDoc(doc);
4024 
4025  return result;
4026 }
4027 #endif
4028 
4029 Datum
4031 {
4032 #ifdef USE_LIBXML
4033  text *data = PG_GETARG_TEXT_P(0);
4034 
4035  PG_RETURN_BOOL(wellformed_xml(data, xmloption));
4036 #else
4037  NO_XML_SUPPORT();
4038  return 0;
4039 #endif /* not USE_LIBXML */
4040 }
4041 
4042 Datum
4044 {
4045 #ifdef USE_LIBXML
4046  text *data = PG_GETARG_TEXT_P(0);
4047 
4048  PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_DOCUMENT));
4049 #else
4050  NO_XML_SUPPORT();
4051  return 0;
4052 #endif /* not USE_LIBXML */
4053 }
4054 
4055 Datum
4057 {
4058 #ifdef USE_LIBXML
4059  text *data = PG_GETARG_TEXT_P(0);
4060 
4061  PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_CONTENT));
4062 #else
4063  NO_XML_SUPPORT();
4064  return 0;
4065 #endif /* not USE_LIBXML */
4066 }