PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
trgm_gist.c
Go to the documentation of this file.
1 /*
2  * contrib/pg_trgm/trgm_gist.c
3  */
4 #include "postgres.h"
5 
6 #include "trgm.h"
7 
8 #include "access/stratnum.h"
9 #include "fmgr.h"
10 
11 
12 typedef struct
13 {
14  /* most recent inputs to gtrgm_consistent */
17  /* extracted trigrams for query */
19  /* if a regex operator, the extracted graph */
21 
22  /*
23  * The "query" and "trigrams" are stored in the same palloc block as this
24  * cache struct, at MAXALIGN'ed offsets. The graph however isn't.
25  */
27 
28 #define GETENTRY(vec,pos) ((TRGM *) DatumGetPointer((vec)->vector[(pos)].key))
29 
30 
41 
42 /* Number of one-bits in an unsigned byte */
43 static const uint8 number_of_ones[256] = {
44  0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
45  1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
46  1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
47  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
48  1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
49  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
50  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
51  3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
52  1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
53  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
54  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
55  3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
56  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
57  3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
58  3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
59  4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
60 };
61 
62 
63 Datum
65 {
66  elog(ERROR, "not implemented");
67  PG_RETURN_DATUM(0);
68 }
69 
70 Datum
72 {
73  elog(ERROR, "not implemented");
74  PG_RETURN_DATUM(0);
75 }
76 
77 static void
79 {
80  int32 k,
81  len = ARRNELEM(a);
82  trgm *ptr = GETARR(a);
83  int32 tmp = 0;
84 
85  MemSet((void *) sign, 0, sizeof(BITVEC));
86  SETBIT(sign, SIGLENBIT); /* set last unused bit */
87  for (k = 0; k < len; k++)
88  {
89  CPTRGM(((char *) &tmp), ptr + k);
90  HASH(sign, tmp);
91  }
92 }
93 
94 Datum
96 {
97  GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
98  GISTENTRY *retval = entry;
99 
100  if (entry->leafkey)
101  { /* trgm */
102  TRGM *res;
103  text *val = DatumGetTextP(entry->key);
104 
105  res = generate_trgm(VARDATA(val), VARSIZE(val) - VARHDRSZ);
106  retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
107  gistentryinit(*retval, PointerGetDatum(res),
108  entry->rel, entry->page,
109  entry->offset, FALSE);
110  }
111  else if (ISSIGNKEY(DatumGetPointer(entry->key)) &&
112  !ISALLTRUE(DatumGetPointer(entry->key)))
113  {
114  int32 i,
115  len;
116  TRGM *res;
118 
119  LOOPBYTE
120  {
121  if ((sign[i] & 0xff) != 0xff)
122  PG_RETURN_POINTER(retval);
123  }
124 
125  len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
126  res = (TRGM *) palloc(len);
127  SET_VARSIZE(res, len);
128  res->flag = SIGNKEY | ALLISTRUE;
129 
130  retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
131  gistentryinit(*retval, PointerGetDatum(res),
132  entry->rel, entry->page,
133  entry->offset, FALSE);
134  }
135  PG_RETURN_POINTER(retval);
136 }
137 
138 Datum
140 {
141  GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
142  GISTENTRY *retval;
143  text *key;
144 
145  key = DatumGetTextP(entry->key);
146 
147  if (key != (text *) DatumGetPointer(entry->key))
148  {
149  /* need to pass back the decompressed item */
150  retval = palloc(sizeof(GISTENTRY));
151  gistentryinit(*retval, PointerGetDatum(key),
152  entry->rel, entry->page, entry->offset, entry->leafkey);
153  PG_RETURN_POINTER(retval);
154  }
155  else
156  {
157  /* we can return the entry as-is */
158  PG_RETURN_POINTER(entry);
159  }
160 }
161 
162 static int32
164 {
165  int32 count = 0;
166  int32 k,
167  len = ARRNELEM(qtrg);
168  trgm *ptr = GETARR(qtrg);
169  int32 tmp = 0;
170 
171  for (k = 0; k < len; k++)
172  {
173  CPTRGM(((char *) &tmp), ptr + k);
174  count += GETBIT(sign, HASHVAL(tmp));
175  }
176 
177  return count;
178 }
179 
180 Datum
182 {
183  GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
184  text *query = PG_GETARG_TEXT_P(1);
186 
187  /* Oid subtype = PG_GETARG_OID(3); */
188  bool *recheck = (bool *) PG_GETARG_POINTER(4);
189  TRGM *key = (TRGM *) DatumGetPointer(entry->key);
190  TRGM *qtrg;
191  bool res;
192  Size querysize = VARSIZE(query);
193  gtrgm_consistent_cache *cache;
194  double nlimit;
195 
196  /*
197  * We keep the extracted trigrams in cache, because trigram extraction is
198  * relatively CPU-expensive. When trying to reuse a cached value, check
199  * strategy number not just query itself, because trigram extraction
200  * depends on strategy.
201  *
202  * The cached structure is a single palloc chunk containing the
203  * gtrgm_consistent_cache header, then the input query (starting at a
204  * MAXALIGN boundary), then the TRGM value (also starting at a MAXALIGN
205  * boundary). However we don't try to include the regex graph (if any) in
206  * that struct. (XXX currently, this approach can leak regex graphs
207  * across index rescans. Not clear if that's worth fixing.)
208  */
209  cache = (gtrgm_consistent_cache *) fcinfo->flinfo->fn_extra;
210  if (cache == NULL ||
211  cache->strategy != strategy ||
212  VARSIZE(cache->query) != querysize ||
213  memcmp((char *) cache->query, (char *) query, querysize) != 0)
214  {
215  gtrgm_consistent_cache *newcache;
216  TrgmPackedGraph *graph = NULL;
217  Size qtrgsize;
218 
219  switch (strategy)
220  {
223  qtrg = generate_trgm(VARDATA(query),
224  querysize - VARHDRSZ);
225  break;
226  case ILikeStrategyNumber:
227 #ifndef IGNORECASE
228  elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
229 #endif
230  /* FALL THRU */
231  case LikeStrategyNumber:
232  qtrg = generate_wildcard_trgm(VARDATA(query),
233  querysize - VARHDRSZ);
234  break;
236 #ifndef IGNORECASE
237  elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
238 #endif
239  /* FALL THRU */
241  qtrg = createTrgmNFA(query, PG_GET_COLLATION(),
242  &graph, fcinfo->flinfo->fn_mcxt);
243  /* just in case an empty array is returned ... */
244  if (qtrg && ARRNELEM(qtrg) <= 0)
245  {
246  pfree(qtrg);
247  qtrg = NULL;
248  }
249  break;
250  default:
251  elog(ERROR, "unrecognized strategy number: %d", strategy);
252  qtrg = NULL; /* keep compiler quiet */
253  break;
254  }
255 
256  qtrgsize = qtrg ? VARSIZE(qtrg) : 0;
257 
258  newcache = (gtrgm_consistent_cache *)
259  MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
261  MAXALIGN(querysize) +
262  qtrgsize);
263 
264  newcache->strategy = strategy;
265  newcache->query = (text *)
266  ((char *) newcache + MAXALIGN(sizeof(gtrgm_consistent_cache)));
267  memcpy((char *) newcache->query, (char *) query, querysize);
268  if (qtrg)
269  {
270  newcache->trigrams = (TRGM *)
271  ((char *) newcache->query + MAXALIGN(querysize));
272  memcpy((char *) newcache->trigrams, (char *) qtrg, qtrgsize);
273  /* release qtrg in case it was made in fn_mcxt */
274  pfree(qtrg);
275  }
276  else
277  newcache->trigrams = NULL;
278  newcache->graph = graph;
279 
280  if (cache)
281  pfree(cache);
282  fcinfo->flinfo->fn_extra = (void *) newcache;
283  cache = newcache;
284  }
285 
286  qtrg = cache->trigrams;
287 
288  switch (strategy)
289  {
292  /* Similarity search is exact. Word similarity search is inexact */
293  *recheck = (strategy == WordSimilarityStrategyNumber);
294  nlimit = (strategy == SimilarityStrategyNumber) ?
296 
297  if (GIST_LEAF(entry))
298  { /* all leafs contains orig trgm */
299  /*
300  * Prevent gcc optimizing the tmpsml variable using volatile
301  * keyword. Otherwise comparison of nlimit and tmpsml may give
302  * wrong results.
303  */
304  float4 volatile tmpsml = cnt_sml(qtrg, key, *recheck);
305 
306  /* strange bug at freebsd 5.2.1 and gcc 3.3.3 */
307  res = (*(int *) &tmpsml == *(int *) &nlimit || tmpsml > nlimit);
308  }
309  else if (ISALLTRUE(key))
310  { /* non-leaf contains signature */
311  res = true;
312  }
313  else
314  { /* non-leaf contains signature */
315  int32 count = cnt_sml_sign_common(qtrg, GETSIGN(key));
316  int32 len = ARRNELEM(qtrg);
317 
318  if (len == 0)
319  res = false;
320  else
321  res = (((((float8) count) / ((float8) len))) >= nlimit);
322  }
323  break;
324  case ILikeStrategyNumber:
325 #ifndef IGNORECASE
326  elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
327 #endif
328  /* FALL THRU */
329  case LikeStrategyNumber:
330  /* Wildcard search is inexact */
331  *recheck = true;
332 
333  /*
334  * Check if all the extracted trigrams can be present in child
335  * nodes.
336  */
337  if (GIST_LEAF(entry))
338  { /* all leafs contains orig trgm */
339  res = trgm_contained_by(qtrg, key);
340  }
341  else if (ISALLTRUE(key))
342  { /* non-leaf contains signature */
343  res = true;
344  }
345  else
346  { /* non-leaf contains signature */
347  int32 k,
348  tmp = 0,
349  len = ARRNELEM(qtrg);
350  trgm *ptr = GETARR(qtrg);
351  BITVECP sign = GETSIGN(key);
352 
353  res = true;
354  for (k = 0; k < len; k++)
355  {
356  CPTRGM(((char *) &tmp), ptr + k);
357  if (!GETBIT(sign, HASHVAL(tmp)))
358  {
359  res = false;
360  break;
361  }
362  }
363  }
364  break;
366 #ifndef IGNORECASE
367  elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
368 #endif
369  /* FALL THRU */
371  /* Regexp search is inexact */
372  *recheck = true;
373 
374  /* Check regex match as much as we can with available info */
375  if (qtrg)
376  {
377  if (GIST_LEAF(entry))
378  { /* all leafs contains orig trgm */
379  bool *check;
380 
381  check = trgm_presence_map(qtrg, key);
382  res = trigramsMatchGraph(cache->graph, check);
383  pfree(check);
384  }
385  else if (ISALLTRUE(key))
386  { /* non-leaf contains signature */
387  res = true;
388  }
389  else
390  { /* non-leaf contains signature */
391  int32 k,
392  tmp = 0,
393  len = ARRNELEM(qtrg);
394  trgm *ptr = GETARR(qtrg);
395  BITVECP sign = GETSIGN(key);
396  bool *check;
397 
398  /*
399  * GETBIT() tests may give false positives, due to limited
400  * size of the sign array. But since trigramsMatchGraph()
401  * implements a monotone boolean function, false positives
402  * in the check array can't lead to false negative answer.
403  * So we can apply trigramsMatchGraph despite uncertainty,
404  * and that usefully improves the quality of the search.
405  */
406  check = (bool *) palloc(len * sizeof(bool));
407  for (k = 0; k < len; k++)
408  {
409  CPTRGM(((char *) &tmp), ptr + k);
410  check[k] = GETBIT(sign, HASHVAL(tmp));
411  }
412  res = trigramsMatchGraph(cache->graph, check);
413  pfree(check);
414  }
415  }
416  else
417  {
418  /* trigram-free query must be rechecked everywhere */
419  res = true;
420  }
421  break;
422  default:
423  elog(ERROR, "unrecognized strategy number: %d", strategy);
424  res = false; /* keep compiler quiet */
425  break;
426  }
427 
428  PG_RETURN_BOOL(res);
429 }
430 
431 Datum
433 {
434  GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
435  text *query = PG_GETARG_TEXT_P(1);
437 
438  /* Oid subtype = PG_GETARG_OID(3); */
439  bool *recheck = (bool *) PG_GETARG_POINTER(4);
440  TRGM *key = (TRGM *) DatumGetPointer(entry->key);
441  TRGM *qtrg;
442  float8 res;
443  Size querysize = VARSIZE(query);
444  char *cache = (char *) fcinfo->flinfo->fn_extra;
445 
446  /*
447  * Cache the generated trigrams across multiple calls with the same query.
448  */
449  if (cache == NULL ||
450  VARSIZE(cache) != querysize ||
451  memcmp(cache, query, querysize) != 0)
452  {
453  char *newcache;
454 
455  qtrg = generate_trgm(VARDATA(query), querysize - VARHDRSZ);
456 
457  newcache = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
458  MAXALIGN(querysize) +
459  VARSIZE(qtrg));
460 
461  memcpy(newcache, query, querysize);
462  memcpy(newcache + MAXALIGN(querysize), qtrg, VARSIZE(qtrg));
463 
464  if (cache)
465  pfree(cache);
466  fcinfo->flinfo->fn_extra = newcache;
467  cache = newcache;
468  }
469 
470  qtrg = (TRGM *) (cache + MAXALIGN(querysize));
471 
472  switch (strategy)
473  {
476  *recheck = strategy == WordDistanceStrategyNumber;
477  if (GIST_LEAF(entry))
478  { /* all leafs contains orig trgm */
479  /*
480  * Prevent gcc optimizing the sml variable using volatile
481  * keyword. Otherwise res can differ from the
482  * word_similarity_dist_op() function.
483  */
484  float4 volatile sml = cnt_sml(qtrg, key, *recheck);
485  res = 1.0 - sml;
486  }
487  else if (ISALLTRUE(key))
488  { /* all leafs contains orig trgm */
489  res = 0.0;
490  }
491  else
492  { /* non-leaf contains signature */
493  int32 count = cnt_sml_sign_common(qtrg, GETSIGN(key));
494  int32 len = ARRNELEM(qtrg);
495 
496  res = (len == 0) ? -1.0 : 1.0 - ((float8) count) / ((float8) len);
497  }
498  break;
499  default:
500  elog(ERROR, "unrecognized strategy number: %d", strategy);
501  res = 0; /* keep compiler quiet */
502  break;
503  }
504 
505  PG_RETURN_FLOAT8(res);
506 }
507 
508 static int32
509 unionkey(BITVECP sbase, TRGM *add)
510 {
511  int32 i;
512 
513  if (ISSIGNKEY(add))
514  {
515  BITVECP sadd = GETSIGN(add);
516 
517  if (ISALLTRUE(add))
518  return 1;
519 
520  LOOPBYTE
521  sbase[i] |= sadd[i];
522  }
523  else
524  {
525  trgm *ptr = GETARR(add);
526  int32 tmp = 0;
527 
528  for (i = 0; i < ARRNELEM(add); i++)
529  {
530  CPTRGM(((char *) &tmp), ptr + i);
531  HASH(sbase, tmp);
532  }
533  }
534  return 0;
535 }
536 
537 
538 Datum
540 {
542  int32 len = entryvec->n;
543  int *size = (int *) PG_GETARG_POINTER(1);
544  BITVEC base;
545  int32 i;
546  int32 flag = 0;
547  TRGM *result;
548 
549  MemSet((void *) base, 0, sizeof(BITVEC));
550  for (i = 0; i < len; i++)
551  {
552  if (unionkey(base, GETENTRY(entryvec, i)))
553  {
554  flag = ALLISTRUE;
555  break;
556  }
557  }
558 
559  flag |= SIGNKEY;
560  len = CALCGTSIZE(flag, 0);
561  result = (TRGM *) palloc(len);
562  SET_VARSIZE(result, len);
563  result->flag = flag;
564  if (!ISALLTRUE(result))
565  memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC));
566  *size = len;
567 
568  PG_RETURN_POINTER(result);
569 }
570 
571 Datum
573 {
574  TRGM *a = (TRGM *) PG_GETARG_POINTER(0);
575  TRGM *b = (TRGM *) PG_GETARG_POINTER(1);
576  bool *result = (bool *) PG_GETARG_POINTER(2);
577 
578  if (ISSIGNKEY(a))
579  { /* then b also ISSIGNKEY */
580  if (ISALLTRUE(a) && ISALLTRUE(b))
581  *result = true;
582  else if (ISALLTRUE(a))
583  *result = false;
584  else if (ISALLTRUE(b))
585  *result = false;
586  else
587  {
588  int32 i;
589  BITVECP sa = GETSIGN(a),
590  sb = GETSIGN(b);
591 
592  *result = true;
593  LOOPBYTE
594  {
595  if (sa[i] != sb[i])
596  {
597  *result = false;
598  break;
599  }
600  }
601  }
602  }
603  else
604  { /* a and b ISARRKEY */
605  int32 lena = ARRNELEM(a),
606  lenb = ARRNELEM(b);
607 
608  if (lena != lenb)
609  *result = false;
610  else
611  {
612  trgm *ptra = GETARR(a),
613  *ptrb = GETARR(b);
614  int32 i;
615 
616  *result = true;
617  for (i = 0; i < lena; i++)
618  if (CMPTRGM(ptra + i, ptrb + i))
619  {
620  *result = false;
621  break;
622  }
623  }
624  }
625 
626  PG_RETURN_POINTER(result);
627 }
628 
629 static int32
631 {
632  int32 size = 0,
633  i;
634 
635  LOOPBYTE
636  size += number_of_ones[(unsigned char) sign[i]];
637  return size;
638 }
639 
640 static int
642 {
643  int i,
644  diff,
645  dist = 0;
646 
647  LOOPBYTE
648  {
649  diff = (unsigned char) (a[i] ^ b[i]);
650  dist += number_of_ones[diff];
651  }
652  return dist;
653 }
654 
655 static int
657 {
658  if (ISALLTRUE(a))
659  {
660  if (ISALLTRUE(b))
661  return 0;
662  else
663  return SIGLENBIT - sizebitvec(GETSIGN(b));
664  }
665  else if (ISALLTRUE(b))
666  return SIGLENBIT - sizebitvec(GETSIGN(a));
667 
668  return hemdistsign(GETSIGN(a), GETSIGN(b));
669 }
670 
671 Datum
673 {
674  GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
675  GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
676  float *penalty = (float *) PG_GETARG_POINTER(2);
677  TRGM *origval = (TRGM *) DatumGetPointer(origentry->key);
678  TRGM *newval = (TRGM *) DatumGetPointer(newentry->key);
679  BITVECP orig = GETSIGN(origval);
680 
681  *penalty = 0.0;
682 
683  if (ISARRKEY(newval))
684  {
685  char *cache = (char *) fcinfo->flinfo->fn_extra;
686  TRGM *cachedVal = (TRGM *) (cache + MAXALIGN(sizeof(BITVEC)));
687  Size newvalsize = VARSIZE(newval);
688  BITVECP sign;
689 
690  /*
691  * Cache the sign data across multiple calls with the same newval.
692  */
693  if (cache == NULL ||
694  VARSIZE(cachedVal) != newvalsize ||
695  memcmp(cachedVal, newval, newvalsize) != 0)
696  {
697  char *newcache;
698 
699  newcache = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
700  MAXALIGN(sizeof(BITVEC)) +
701  newvalsize);
702 
703  makesign((BITVECP) newcache, newval);
704 
705  cachedVal = (TRGM *) (newcache + MAXALIGN(sizeof(BITVEC)));
706  memcpy(cachedVal, newval, newvalsize);
707 
708  if (cache)
709  pfree(cache);
710  fcinfo->flinfo->fn_extra = newcache;
711  cache = newcache;
712  }
713 
714  sign = (BITVECP) cache;
715 
716  if (ISALLTRUE(origval))
717  *penalty = ((float) (SIGLENBIT - sizebitvec(sign))) / (float) (SIGLENBIT + 1);
718  else
719  *penalty = hemdistsign(sign, orig);
720  }
721  else
722  *penalty = hemdist(origval, newval);
723  PG_RETURN_POINTER(penalty);
724 }
725 
726 typedef struct
727 {
728  bool allistrue;
730 } CACHESIGN;
731 
732 static void
734 {
735  item->allistrue = false;
736  if (ISARRKEY(key))
737  makesign(item->sign, key);
738  else if (ISALLTRUE(key))
739  item->allistrue = true;
740  else
741  memcpy((void *) item->sign, (void *) GETSIGN(key), sizeof(BITVEC));
742 }
743 
744 #define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
745 typedef struct
746 {
747  OffsetNumber pos;
748  int32 cost;
749 } SPLITCOST;
750 
751 static int
752 comparecost(const void *a, const void *b)
753 {
754  if (((const SPLITCOST *) a)->cost == ((const SPLITCOST *) b)->cost)
755  return 0;
756  else
757  return (((const SPLITCOST *) a)->cost > ((const SPLITCOST *) b)->cost) ? 1 : -1;
758 }
759 
760 
761 static int
763 {
764  if (a->allistrue)
765  {
766  if (b->allistrue)
767  return 0;
768  else
769  return SIGLENBIT - sizebitvec(b->sign);
770  }
771  else if (b->allistrue)
772  return SIGLENBIT - sizebitvec(a->sign);
773 
774  return hemdistsign(a->sign, b->sign);
775 }
776 
777 Datum
779 {
781  OffsetNumber maxoff = entryvec->n - 2;
783  OffsetNumber k,
784  j;
785  TRGM *datum_l,
786  *datum_r;
787  BITVECP union_l,
788  union_r;
789  int32 size_alpha,
790  size_beta;
791  int32 size_waste,
792  waste = -1;
793  int32 nbytes;
794  OffsetNumber seed_1 = 0,
795  seed_2 = 0;
796  OffsetNumber *left,
797  *right;
798  BITVECP ptr;
799  int i;
800  CACHESIGN *cache;
801  SPLITCOST *costvector;
802 
803  /* cache the sign data for each existing item */
804  cache = (CACHESIGN *) palloc(sizeof(CACHESIGN) * (maxoff + 2));
805  for (k = FirstOffsetNumber; k <= maxoff; k = OffsetNumberNext(k))
806  fillcache(&cache[k], GETENTRY(entryvec, k));
807 
808  /* now find the two furthest-apart items */
809  for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
810  {
811  for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
812  {
813  size_waste = hemdistcache(&(cache[j]), &(cache[k]));
814  if (size_waste > waste)
815  {
816  waste = size_waste;
817  seed_1 = k;
818  seed_2 = j;
819  }
820  }
821  }
822 
823  /* just in case we didn't make a selection ... */
824  if (seed_1 == 0 || seed_2 == 0)
825  {
826  seed_1 = 1;
827  seed_2 = 2;
828  }
829 
830  /* initialize the result vectors */
831  nbytes = (maxoff + 2) * sizeof(OffsetNumber);
832  v->spl_left = left = (OffsetNumber *) palloc(nbytes);
833  v->spl_right = right = (OffsetNumber *) palloc(nbytes);
834  v->spl_nleft = 0;
835  v->spl_nright = 0;
836 
837  /* form initial .. */
838  if (cache[seed_1].allistrue)
839  {
840  datum_l = (TRGM *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
841  SET_VARSIZE(datum_l, CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
842  datum_l->flag = SIGNKEY | ALLISTRUE;
843  }
844  else
845  {
846  datum_l = (TRGM *) palloc(CALCGTSIZE(SIGNKEY, 0));
847  SET_VARSIZE(datum_l, CALCGTSIZE(SIGNKEY, 0));
848  datum_l->flag = SIGNKEY;
849  memcpy((void *) GETSIGN(datum_l), (void *) cache[seed_1].sign, sizeof(BITVEC));
850  }
851  if (cache[seed_2].allistrue)
852  {
853  datum_r = (TRGM *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
854  SET_VARSIZE(datum_r, CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
855  datum_r->flag = SIGNKEY | ALLISTRUE;
856  }
857  else
858  {
859  datum_r = (TRGM *) palloc(CALCGTSIZE(SIGNKEY, 0));
860  SET_VARSIZE(datum_r, CALCGTSIZE(SIGNKEY, 0));
861  datum_r->flag = SIGNKEY;
862  memcpy((void *) GETSIGN(datum_r), (void *) cache[seed_2].sign, sizeof(BITVEC));
863  }
864 
865  union_l = GETSIGN(datum_l);
866  union_r = GETSIGN(datum_r);
867  maxoff = OffsetNumberNext(maxoff);
868  fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff));
869  /* sort before ... */
870  costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
871  for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
872  {
873  costvector[j - 1].pos = j;
874  size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j]));
875  size_beta = hemdistcache(&(cache[seed_2]), &(cache[j]));
876  costvector[j - 1].cost = abs(size_alpha - size_beta);
877  }
878  qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
879 
880  for (k = 0; k < maxoff; k++)
881  {
882  j = costvector[k].pos;
883  if (j == seed_1)
884  {
885  *left++ = j;
886  v->spl_nleft++;
887  continue;
888  }
889  else if (j == seed_2)
890  {
891  *right++ = j;
892  v->spl_nright++;
893  continue;
894  }
895 
896  if (ISALLTRUE(datum_l) || cache[j].allistrue)
897  {
898  if (ISALLTRUE(datum_l) && cache[j].allistrue)
899  size_alpha = 0;
900  else
901  size_alpha = SIGLENBIT - sizebitvec(
902  (cache[j].allistrue) ? GETSIGN(datum_l) : GETSIGN(cache[j].sign)
903  );
904  }
905  else
906  size_alpha = hemdistsign(cache[j].sign, GETSIGN(datum_l));
907 
908  if (ISALLTRUE(datum_r) || cache[j].allistrue)
909  {
910  if (ISALLTRUE(datum_r) && cache[j].allistrue)
911  size_beta = 0;
912  else
913  size_beta = SIGLENBIT - sizebitvec(
914  (cache[j].allistrue) ? GETSIGN(datum_r) : GETSIGN(cache[j].sign)
915  );
916  }
917  else
918  size_beta = hemdistsign(cache[j].sign, GETSIGN(datum_r));
919 
920  if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1))
921  {
922  if (ISALLTRUE(datum_l) || cache[j].allistrue)
923  {
924  if (!ISALLTRUE(datum_l))
925  MemSet((void *) GETSIGN(datum_l), 0xff, sizeof(BITVEC));
926  }
927  else
928  {
929  ptr = cache[j].sign;
930  LOOPBYTE
931  union_l[i] |= ptr[i];
932  }
933  *left++ = j;
934  v->spl_nleft++;
935  }
936  else
937  {
938  if (ISALLTRUE(datum_r) || cache[j].allistrue)
939  {
940  if (!ISALLTRUE(datum_r))
941  MemSet((void *) GETSIGN(datum_r), 0xff, sizeof(BITVEC));
942  }
943  else
944  {
945  ptr = cache[j].sign;
946  LOOPBYTE
947  union_r[i] |= ptr[i];
948  }
949  *right++ = j;
950  v->spl_nright++;
951  }
952  }
953 
954  *right = *left = FirstOffsetNumber;
955  v->spl_ldatum = PointerGetDatum(datum_l);
956  v->spl_rdatum = PointerGetDatum(datum_r);
957 
959 }
#define GIST_LEAF(entry)
Definition: gist.h:133
Relation rel
Definition: gist.h:124
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:305
OffsetNumber pos
Definition: hstore_gist.c:323
#define SETBIT(x, i)
Definition: blutils.c:33
bool trigramsMatchGraph(TrgmPackedGraph *graph, bool *check)
Definition: trgm_regexp.c:636
#define CMPTRGM(a, b)
Definition: trgm.h:42
#define VARDATA(PTR)
Definition: postgres.h:305
static int hemdist(TRGM *a, TRGM *b)
Definition: trgm_gist.c:656
#define RegExpStrategyNumber
Definition: trgm.h:33
static int32 cnt_sml_sign_common(TRGM *qtrg, BITVECP sign)
Definition: trgm_gist.c:163
TRGM * generate_wildcard_trgm(const char *str, int slen)
Definition: trgm_op.c:791
#define SimilarityStrategyNumber
Definition: trgm.h:29
#define VARSIZE(PTR)
Definition: postgres.h:306
#define ISSIGNKEY(x)
Definition: trgm.h:99
#define GETSIGN(x)
Definition: hstore_gist.c:54
#define PointerGetDatum(X)
Definition: postgres.h:564
#define DistanceStrategyNumber
Definition: trgm.h:30
#define VARHDRSZ
Definition: c.h:440
PG_FUNCTION_INFO_V1(gtrgm_in)
TRGM * createTrgmNFA(text *text_re, Oid collation, TrgmPackedGraph **graph, MemoryContext rcontext)
Definition: trgm_regexp.c:516
#define RegExpICaseStrategyNumber
Definition: trgm.h:34
#define SIGLENBIT
Definition: hstore_gist.c:17
#define WordDistanceStrategyNumber
Definition: trgm.h:36
#define PG_RETURN_FLOAT8(x)
Definition: fmgr.h:310
double similarity_threshold
Definition: trgm_op.c:19
static int hemdistcache(CACHESIGN *a, CACHESIGN *b)
Definition: trgm_gist.c:762
OffsetNumber * spl_left
Definition: gist.h:105
Datum spl_rdatum
Definition: gist.h:112
unsigned char uint8
Definition: c.h:263
Datum gtrgm_union(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:539
int32 n
Definition: gist.h:160
static int comparecost(const void *a, const void *b)
Definition: trgm_gist.c:752
uint16 StrategyNumber
Definition: stratnum.h:22
float4 cnt_sml(TRGM *trg1, TRGM *trg2, bool inexact)
Definition: trgm_op.c:923
#define ALLISTRUE
Definition: hstore_gist.c:47
#define MemSet(start, val, len)
Definition: c.h:849
static void fillcache(CACHESIGN *item, TRGM *key)
Definition: trgm_gist.c:733
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:232
#define HASH(sign, val)
Definition: hstore_gist.c:38
double word_similarity_threshold
Definition: trgm_op.c:20
#define ARRNELEM(x)
Definition: trgm.h:105
bool * trgm_presence_map(TRGM *query, TRGM *key)
Definition: trgm_op.c:1010
int spl_nleft
Definition: gist.h:106
Datum gtrgm_same(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:572
static const uint8 number_of_ones[256]
Definition: trgm_gist.c:43
#define PG_GET_COLLATION()
Definition: fmgr.h:155
signed int int32
Definition: c.h:253
int32 cost
Definition: hstore_gist.c:324
#define WISH_F(a, b, c)
Definition: trgm_gist.c:744
uint16 OffsetNumber
Definition: off.h:24
Page page
Definition: gist.h:125
#define ISARRKEY(x)
Definition: trgm.h:98
static int32 unionkey(BITVECP sbase, TRGM *add)
Definition: trgm_gist.c:509
#define GETARR(x)
Definition: trgm.h:104
Datum gtrgm_distance(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:432
void pfree(void *pointer)
Definition: mcxt.c:995
#define GETBIT(x, i)
Definition: blutils.c:34
#define ERROR
Definition: elog.h:43
double float8
Definition: c.h:377
TrgmPackedGraph * graph
Definition: trgm_gist.c:20
Definition: trgm.h:63
#define FALSE
Definition: c.h:218
int spl_nright
Definition: gist.h:111
Datum gtrgm_consistent(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:181
static void makesign(BITVECP sign, TRGM *a)
Definition: trgm_gist.c:78
char BITVEC[SIGLEN]
Definition: hstore_gist.c:19
BITVEC sign
Definition: trgm_gist.c:729
char sign
Definition: informix.c:693
StrategyNumber strategy
Definition: trgm_gist.c:15
Datum key
Definition: gist.h:123
#define LikeStrategyNumber
Definition: trgm.h:31
#define FirstOffsetNumber
Definition: off.h:27
char * flag(int b)
Definition: test-ctype.c:33
#define HASHVAL(val)
Definition: hstore_gist.c:37
bool trgm_contained_by(TRGM *trg1, TRGM *trg2)
Definition: trgm_op.c:971
bool leafkey
Definition: gist.h:127
Datum gtrgm_out(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:71
float float4
Definition: c.h:376
#define SIGNKEY
Definition: trgm.h:95
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:303
uintptr_t Datum
Definition: postgres.h:374
#define GETENTRY(vec, pos)
Definition: trgm_gist.c:28
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:297
uint8 flag
Definition: trgm.h:66
Datum gtrgm_penalty(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:672
Datum gtrgm_picksplit(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:778
#define ISALLTRUE(x)
Definition: hstore_gist.c:49
#define WordSimilarityStrategyNumber
Definition: trgm.h:35
char trgm[3]
Definition: trgm.h:38
static int hemdistsign(BITVECP a, BITVECP b)
Definition: trgm_gist.c:641
#define CALCGTSIZE(flag)
Definition: hstore_gist.c:52
Datum spl_ldatum
Definition: gist.h:107
#define NULL
Definition: c.h:226
#define gistentryinit(e, k, r, pg, o, l)
Definition: gist.h:169
Datum gtrgm_decompress(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:139
TRGM * generate_trgm(char *str, int slen)
Definition: trgm_op.c:321
#define OffsetNumberNext(offsetNumber)
Definition: off.h:53
size_t Size
Definition: c.h:352
#define newval
OffsetNumber * spl_right
Definition: gist.h:110
#define MAXALIGN(LEN)
Definition: c.h:580
#define DatumGetTextP(X)
Definition: fmgr.h:248
#define PG_GETARG_UINT16(n)
Definition: fmgr.h:228
#define DatumGetPointer(X)
Definition: postgres.h:557
void * palloc(Size size)
Definition: mcxt.c:894
#define PG_GETARG_TEXT_P(n)
Definition: fmgr.h:269
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:752
#define LOOPBYTE
Definition: hstore_gist.c:25
int i
Definition: c.h:434
#define PG_FUNCTION_ARGS
Definition: fmgr.h:150
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:330
char * BITVECP
Definition: hstore_gist.c:20
#define ILikeStrategyNumber
Definition: trgm.h:32
bool allistrue
Definition: trgm_gist.c:728
#define elog
Definition: elog.h:218
#define qsort(a, b, c, d)
Definition: port.h:438
Datum gtrgm_in(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:64
static int32 sizebitvec(BITVECP sign)
Definition: trgm_gist.c:630
Datum gtrgm_compress(PG_FUNCTION_ARGS)
Definition: trgm_gist.c:95
OffsetNumber offset
Definition: gist.h:126
long val
Definition: informix.c:689
#define CPTRGM(a, b)
Definition: trgm.h:44