PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
ascii.c
Go to the documentation of this file.
1 /*-----------------------------------------------------------------------
2  * ascii.c
3  * The PostgreSQL routine for string to ascii conversion.
4  *
5  * Portions Copyright (c) 1999-2016, PostgreSQL Global Development Group
6  *
7  * IDENTIFICATION
8  * src/backend/utils/adt/ascii.c
9  *
10  *-----------------------------------------------------------------------
11  */
12 #include "postgres.h"
13 
14 #include "mb/pg_wchar.h"
15 #include "utils/ascii.h"
16 
17 static void pg_to_ascii(unsigned char *src, unsigned char *src_end,
18  unsigned char *dest, int enc);
19 static text *encode_to_ascii(text *data, int enc);
20 
21 
22 /* ----------
23  * to_ascii
24  * ----------
25  */
26 static void
27 pg_to_ascii(unsigned char *src, unsigned char *src_end, unsigned char *dest, int enc)
28 {
29  unsigned char *x;
30  const unsigned char *ascii;
31  int range;
32 
33  /*
34  * relevant start for an encoding
35  */
36 #define RANGE_128 128
37 #define RANGE_160 160
38 
39  if (enc == PG_LATIN1)
40  {
41  /*
42  * ISO-8859-1 <range: 160 -- 255>
43  */
44  ascii = (const unsigned char *) " cL Y \"Ca -R 'u ., ?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty";
45  range = RANGE_160;
46  }
47  else if (enc == PG_LATIN2)
48  {
49  /*
50  * ISO-8859-2 <range: 160 -- 255>
51  */
52  ascii = (const unsigned char *) " A L LS \"SSTZ-ZZ a,l'ls ,sstz\"zzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt.";
53  range = RANGE_160;
54  }
55  else if (enc == PG_LATIN9)
56  {
57  /*
58  * ISO-8859-15 <range: 160 -- 255>
59  */
60  ascii = (const unsigned char *) " cL YS sCa -R Zu .z EeY?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty";
61  range = RANGE_160;
62  }
63  else if (enc == PG_WIN1250)
64  {
65  /*
66  * Window CP1250 <range: 128 -- 255>
67  */
68  ascii = (const unsigned char *) " ' \" %S<STZZ `'\"\".-- s>stzz L A \"CS -RZ ,l'u .,as L\"lzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt ";
69  range = RANGE_128;
70  }
71  else
72  {
73  ereport(ERROR,
74  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
75  errmsg("encoding conversion from %s to ASCII not supported",
76  pg_encoding_to_char(enc))));
77  return; /* keep compiler quiet */
78  }
79 
80  /*
81  * Encode
82  */
83  for (x = src; x < src_end; x++)
84  {
85  if (*x < 128)
86  *dest++ = *x;
87  else if (*x < range)
88  *dest++ = ' '; /* bogus 128 to 'range' */
89  else
90  *dest++ = ascii[*x - range];
91  }
92 }
93 
94 /* ----------
95  * encode text
96  *
97  * The text datum is overwritten in-place, therefore this coding method
98  * cannot support conversions that change the string length!
99  * ----------
100  */
101 static text *
103 {
104  pg_to_ascii((unsigned char *) VARDATA(data), /* src */
105  (unsigned char *) (data) + VARSIZE(data), /* src end */
106  (unsigned char *) VARDATA(data), /* dest */
107  enc); /* encoding */
108 
109  return data;
110 }
111 
112 /* ----------
113  * convert to ASCII - enc is set as 'name' arg.
114  * ----------
115  */
116 Datum
118 {
119  text *data = PG_GETARG_TEXT_P_COPY(0);
120  char *encname = NameStr(*PG_GETARG_NAME(1));
121  int enc = pg_char_to_encoding(encname);
122 
123  if (enc < 0)
124  ereport(ERROR,
125  (errcode(ERRCODE_UNDEFINED_OBJECT),
126  errmsg("%s is not a valid encoding name", encname)));
127 
128  PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
129 }
130 
131 /* ----------
132  * convert to ASCII - enc is set as int4
133  * ----------
134  */
135 Datum
137 {
138  text *data = PG_GETARG_TEXT_P_COPY(0);
139  int enc = PG_GETARG_INT32(1);
140 
141  if (!PG_VALID_ENCODING(enc))
142  ereport(ERROR,
143  (errcode(ERRCODE_UNDEFINED_OBJECT),
144  errmsg("%d is not a valid encoding code", enc)));
145 
146  PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
147 }
148 
149 /* ----------
150  * convert to ASCII - current enc is DatabaseEncoding
151  * ----------
152  */
153 Datum
155 {
156  text *data = PG_GETARG_TEXT_P_COPY(0);
157  int enc = GetDatabaseEncoding();
158 
159  PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
160 }
161 
162 /* ----------
163  * Copy a string in an arbitrary backend-safe encoding, converting it to a
164  * valid ASCII string by replacing non-ASCII bytes with '?'. Otherwise the
165  * behavior is identical to strlcpy(), except that we don't bother with a
166  * return value.
167  *
168  * This must not trigger ereport(ERROR), as it is called in postmaster.
169  * ----------
170  */
171 void
172 ascii_safe_strlcpy(char *dest, const char *src, size_t destsiz)
173 {
174  if (destsiz == 0) /* corner case: no room for trailing nul */
175  return;
176 
177  while (--destsiz > 0)
178  {
179  /* use unsigned char here to avoid compiler warning */
180  unsigned char ch = *src++;
181 
182  if (ch == '\0')
183  break;
184  /* Keep printable ASCII characters */
185  if (32 <= ch && ch <= 127)
186  *dest = ch;
187  /* White-space is also OK */
188  else if (ch == '\n' || ch == '\r' || ch == '\t')
189  *dest = ch;
190  /* Everything else is replaced with '?' */
191  else
192  *dest = '?';
193  dest++;
194  }
195 
196  *dest = '\0';
197 }
#define PG_GETARG_INT32(n)
Definition: fmgr.h:225
static void pg_to_ascii(unsigned char *src, unsigned char *src_end, unsigned char *dest, int enc)
Definition: ascii.c:27
static struct cvec * range(struct vars *v, celt a, celt b, int cases)
Definition: regc_locale.c:403
int pg_char_to_encoding(const char *name)
Definition: encnames.c:475
#define RANGE_128
#define VARDATA(PTR)
Definition: postgres.h:305
#define VARSIZE(PTR)
Definition: postgres.h:306
Datum to_ascii_encname(PG_FUNCTION_ARGS)
Definition: ascii.c:117
#define RANGE_160
Datum ascii(PG_FUNCTION_ARGS)
int errcode(int sqlerrcode)
Definition: elog.c:575
void ascii_safe_strlcpy(char *dest, const char *src, size_t destsiz)
Definition: ascii.c:172
Datum to_ascii_enc(PG_FUNCTION_ARGS)
Definition: ascii.c:136
#define PG_GETARG_TEXT_P_COPY(n)
Definition: fmgr.h:278
Datum to_ascii_default(PG_FUNCTION_ARGS)
Definition: ascii.c:154
#define ERROR
Definition: elog.h:43
struct pg_encoding enc
Definition: encode.c:522
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:299
#define ereport(elevel, rest)
Definition: elog.h:122
uintptr_t Datum
Definition: postgres.h:374
int GetDatabaseEncoding(void)
Definition: mbutils.c:1015
#define PG_RETURN_TEXT_P(x)
Definition: fmgr.h:314
const char * pg_encoding_to_char(int encoding)
Definition: encnames.c:531
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define NameStr(name)
Definition: c.h:494
Definition: c.h:434
#define PG_FUNCTION_ARGS
Definition: fmgr.h:150
static text * encode_to_ascii(text *data, int enc)
Definition: ascii.c:102
#define PG_GETARG_NAME(n)
Definition: fmgr.h:234