Header And Logo

PostgreSQL
| The world's most advanced open source database.

hstore_gist.c

Go to the documentation of this file.
00001 /*
00002  * contrib/hstore/hstore_gist.c
00003  */
00004 #include "postgres.h"
00005 
00006 #include "access/gist.h"
00007 #include "access/itup.h"
00008 #include "access/skey.h"
00009 #include "catalog/pg_type.h"
00010 
00011 #include "crc32.h"
00012 #include "hstore.h"
00013 
00014 /* bigint defines */
00015 #define BITBYTE 8
00016 #define SIGLENINT  4            /* >122 => key will toast, so very slow!!! */
00017 #define SIGLEN  ( sizeof(int)*SIGLENINT )
00018 #define SIGLENBIT (SIGLEN*BITBYTE)
00019 
00020 typedef char BITVEC[SIGLEN];
00021 typedef char *BITVECP;
00022 
00023 #define SIGPTR(x)  ( (BITVECP) ARR_DATA_PTR(x) )
00024 
00025 
00026 #define LOOPBYTE \
00027             for(i=0;i<SIGLEN;i++)
00028 
00029 #define LOOPBIT \
00030             for(i=0;i<SIGLENBIT;i++)
00031 
00032 /* beware of multiple evaluation of arguments to these macros! */
00033 #define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )
00034 #define GETBITBYTE(x,i) ( (*((char*)(x)) >> (i)) & 0x01 )
00035 #define CLRBIT(x,i)   GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) )
00036 #define SETBIT(x,i)   GETBYTE(x,i) |=  ( 0x01 << ( (i) % BITBYTE ) )
00037 #define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )
00038 #define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT)
00039 #define HASH(sign, val) SETBIT((sign), HASHVAL(val))
00040 
00041 typedef struct
00042 {
00043     int32       vl_len_;        /* varlena header (do not touch directly!) */
00044     int4        flag;
00045     char        data[1];
00046 } GISTTYPE;
00047 
00048 #define ALLISTRUE       0x04
00049 
00050 #define ISALLTRUE(x)    ( ((GISTTYPE*)x)->flag & ALLISTRUE )
00051 
00052 #define GTHDRSIZE       (VARHDRSZ + sizeof(int4))
00053 #define CALCGTSIZE(flag) ( GTHDRSIZE+(((flag) & ALLISTRUE) ? 0 : SIGLEN) )
00054 
00055 #define GETSIGN(x)      ( (BITVECP)( (char*)x+GTHDRSIZE ) )
00056 
00057 #define SUMBIT(val) (       \
00058     GETBITBYTE((val),0) + \
00059     GETBITBYTE((val),1) + \
00060     GETBITBYTE((val),2) + \
00061     GETBITBYTE((val),3) + \
00062     GETBITBYTE((val),4) + \
00063     GETBITBYTE((val),5) + \
00064     GETBITBYTE((val),6) + \
00065     GETBITBYTE((val),7)   \
00066 )
00067 
00068 #define GETENTRY(vec,pos) ((GISTTYPE *) DatumGetPointer((vec)->vector[(pos)].key))
00069 
00070 #define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
00071 
00072 PG_FUNCTION_INFO_V1(ghstore_in);
00073 Datum       ghstore_in(PG_FUNCTION_ARGS);
00074 
00075 PG_FUNCTION_INFO_V1(ghstore_out);
00076 Datum       ghstore_out(PG_FUNCTION_ARGS);
00077 
00078 
00079 Datum
00080 ghstore_in(PG_FUNCTION_ARGS)
00081 {
00082     elog(ERROR, "Not implemented");
00083     PG_RETURN_DATUM(0);
00084 }
00085 
00086 Datum
00087 ghstore_out(PG_FUNCTION_ARGS)
00088 {
00089     elog(ERROR, "Not implemented");
00090     PG_RETURN_DATUM(0);
00091 }
00092 
00093 PG_FUNCTION_INFO_V1(ghstore_consistent);
00094 PG_FUNCTION_INFO_V1(ghstore_compress);
00095 PG_FUNCTION_INFO_V1(ghstore_decompress);
00096 PG_FUNCTION_INFO_V1(ghstore_penalty);
00097 PG_FUNCTION_INFO_V1(ghstore_picksplit);
00098 PG_FUNCTION_INFO_V1(ghstore_union);
00099 PG_FUNCTION_INFO_V1(ghstore_same);
00100 
00101 Datum       ghstore_consistent(PG_FUNCTION_ARGS);
00102 Datum       ghstore_compress(PG_FUNCTION_ARGS);
00103 Datum       ghstore_decompress(PG_FUNCTION_ARGS);
00104 Datum       ghstore_penalty(PG_FUNCTION_ARGS);
00105 Datum       ghstore_picksplit(PG_FUNCTION_ARGS);
00106 Datum       ghstore_union(PG_FUNCTION_ARGS);
00107 Datum       ghstore_same(PG_FUNCTION_ARGS);
00108 
00109 Datum
00110 ghstore_compress(PG_FUNCTION_ARGS)
00111 {
00112     GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
00113     GISTENTRY  *retval = entry;
00114 
00115     if (entry->leafkey)
00116     {
00117         GISTTYPE   *res = (GISTTYPE *) palloc0(CALCGTSIZE(0));
00118         HStore     *val = DatumGetHStoreP(entry->key);
00119         HEntry     *hsent = ARRPTR(val);
00120         char       *ptr = STRPTR(val);
00121         int         count = HS_COUNT(val);
00122         int         i;
00123 
00124         SET_VARSIZE(res, CALCGTSIZE(0));
00125 
00126         for (i = 0; i < count; ++i)
00127         {
00128             int         h;
00129 
00130             h = crc32_sz((char *) HS_KEY(hsent, ptr, i), HS_KEYLEN(hsent, i));
00131             HASH(GETSIGN(res), h);
00132             if (!HS_VALISNULL(hsent, i))
00133             {
00134                 h = crc32_sz((char *) HS_VAL(hsent, ptr, i), HS_VALLEN(hsent, i));
00135                 HASH(GETSIGN(res), h);
00136             }
00137         }
00138 
00139         retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
00140         gistentryinit(*retval, PointerGetDatum(res),
00141                       entry->rel, entry->page,
00142                       entry->offset,
00143                       FALSE);
00144     }
00145     else if (!ISALLTRUE(DatumGetPointer(entry->key)))
00146     {
00147         int4        i;
00148         GISTTYPE   *res;
00149         BITVECP     sign = GETSIGN(DatumGetPointer(entry->key));
00150 
00151         LOOPBYTE
00152         {
00153             if ((sign[i] & 0xff) != 0xff)
00154                 PG_RETURN_POINTER(retval);
00155         }
00156 
00157         res = (GISTTYPE *) palloc(CALCGTSIZE(ALLISTRUE));
00158         SET_VARSIZE(res, CALCGTSIZE(ALLISTRUE));
00159         res->flag = ALLISTRUE;
00160 
00161         retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
00162         gistentryinit(*retval, PointerGetDatum(res),
00163                       entry->rel, entry->page,
00164                       entry->offset,
00165                       FALSE);
00166     }
00167 
00168     PG_RETURN_POINTER(retval);
00169 }
00170 
00171 /*
00172  * Since type ghstore isn't toastable (and doesn't need to be),
00173  * this function can be a no-op.
00174  */
00175 Datum
00176 ghstore_decompress(PG_FUNCTION_ARGS)
00177 {
00178     PG_RETURN_POINTER(PG_GETARG_POINTER(0));
00179 }
00180 
00181 Datum
00182 ghstore_same(PG_FUNCTION_ARGS)
00183 {
00184     GISTTYPE   *a = (GISTTYPE *) PG_GETARG_POINTER(0);
00185     GISTTYPE   *b = (GISTTYPE *) PG_GETARG_POINTER(1);
00186     bool       *result = (bool *) PG_GETARG_POINTER(2);
00187 
00188     if (ISALLTRUE(a) && ISALLTRUE(b))
00189         *result = true;
00190     else if (ISALLTRUE(a))
00191         *result = false;
00192     else if (ISALLTRUE(b))
00193         *result = false;
00194     else
00195     {
00196         int4        i;
00197         BITVECP     sa = GETSIGN(a),
00198                     sb = GETSIGN(b);
00199 
00200         *result = true;
00201         LOOPBYTE
00202         {
00203             if (sa[i] != sb[i])
00204             {
00205                 *result = false;
00206                 break;
00207             }
00208         }
00209     }
00210     PG_RETURN_POINTER(result);
00211 }
00212 
00213 static int4
00214 sizebitvec(BITVECP sign)
00215 {
00216     int4        size = 0,
00217                 i;
00218 
00219     LOOPBYTE
00220     {
00221         size += SUMBIT(sign);
00222         sign = (BITVECP) (((char *) sign) + 1);
00223     }
00224     return size;
00225 }
00226 
00227 static int
00228 hemdistsign(BITVECP a, BITVECP b)
00229 {
00230     int         i,
00231                 dist = 0;
00232 
00233     LOOPBIT
00234     {
00235         if (GETBIT(a, i) != GETBIT(b, i))
00236             dist++;
00237     }
00238     return dist;
00239 }
00240 
00241 static int
00242 hemdist(GISTTYPE *a, GISTTYPE *b)
00243 {
00244     if (ISALLTRUE(a))
00245     {
00246         if (ISALLTRUE(b))
00247             return 0;
00248         else
00249             return SIGLENBIT - sizebitvec(GETSIGN(b));
00250     }
00251     else if (ISALLTRUE(b))
00252         return SIGLENBIT - sizebitvec(GETSIGN(a));
00253 
00254     return hemdistsign(GETSIGN(a), GETSIGN(b));
00255 }
00256 
00257 static int4
00258 unionkey(BITVECP sbase, GISTTYPE *add)
00259 {
00260     int4        i;
00261     BITVECP     sadd = GETSIGN(add);
00262 
00263     if (ISALLTRUE(add))
00264         return 1;
00265     LOOPBYTE
00266         sbase[i] |= sadd[i];
00267     return 0;
00268 }
00269 
00270 Datum
00271 ghstore_union(PG_FUNCTION_ARGS)
00272 {
00273     GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
00274     int4        len = entryvec->n;
00275 
00276     int        *size = (int *) PG_GETARG_POINTER(1);
00277     BITVEC      base;
00278     int4        i;
00279     int4        flag = 0;
00280     GISTTYPE   *result;
00281 
00282     MemSet((void *) base, 0, sizeof(BITVEC));
00283     for (i = 0; i < len; i++)
00284     {
00285         if (unionkey(base, GETENTRY(entryvec, i)))
00286         {
00287             flag = ALLISTRUE;
00288             break;
00289         }
00290     }
00291 
00292     len = CALCGTSIZE(flag);
00293     result = (GISTTYPE *) palloc(len);
00294     SET_VARSIZE(result, len);
00295     result->flag = flag;
00296     if (!ISALLTRUE(result))
00297         memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC));
00298     *size = len;
00299 
00300     PG_RETURN_POINTER(result);
00301 }
00302 
00303 Datum
00304 ghstore_penalty(PG_FUNCTION_ARGS)
00305 {
00306     GISTENTRY  *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
00307     GISTENTRY  *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
00308     float      *penalty = (float *) PG_GETARG_POINTER(2);
00309     GISTTYPE   *origval = (GISTTYPE *) DatumGetPointer(origentry->key);
00310     GISTTYPE   *newval = (GISTTYPE *) DatumGetPointer(newentry->key);
00311 
00312     *penalty = hemdist(origval, newval);
00313     PG_RETURN_POINTER(penalty);
00314 }
00315 
00316 
00317 typedef struct
00318 {
00319     OffsetNumber pos;
00320     int4        cost;
00321 } SPLITCOST;
00322 
00323 static int
00324 comparecost(const void *a, const void *b)
00325 {
00326     return ((SPLITCOST *) a)->cost - ((SPLITCOST *) b)->cost;
00327 }
00328 
00329 
00330 Datum
00331 ghstore_picksplit(PG_FUNCTION_ARGS)
00332 {
00333     GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
00334     OffsetNumber maxoff = entryvec->n - 2;
00335 
00336     GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
00337     OffsetNumber k,
00338                 j;
00339     GISTTYPE   *datum_l,
00340                *datum_r;
00341     BITVECP     union_l,
00342                 union_r;
00343     int4        size_alpha,
00344                 size_beta;
00345     int4        size_waste,
00346                 waste = -1;
00347     int4        nbytes;
00348     OffsetNumber seed_1 = 0,
00349                 seed_2 = 0;
00350     OffsetNumber *left,
00351                *right;
00352     BITVECP     ptr;
00353     int         i;
00354     SPLITCOST  *costvector;
00355     GISTTYPE   *_k,
00356                *_j;
00357 
00358     nbytes = (maxoff + 2) * sizeof(OffsetNumber);
00359     v->spl_left = (OffsetNumber *) palloc(nbytes);
00360     v->spl_right = (OffsetNumber *) palloc(nbytes);
00361 
00362     for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
00363     {
00364         _k = GETENTRY(entryvec, k);
00365         for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
00366         {
00367             size_waste = hemdist(_k, GETENTRY(entryvec, j));
00368             if (size_waste > waste)
00369             {
00370                 waste = size_waste;
00371                 seed_1 = k;
00372                 seed_2 = j;
00373             }
00374         }
00375     }
00376 
00377     left = v->spl_left;
00378     v->spl_nleft = 0;
00379     right = v->spl_right;
00380     v->spl_nright = 0;
00381 
00382     if (seed_1 == 0 || seed_2 == 0)
00383     {
00384         seed_1 = 1;
00385         seed_2 = 2;
00386     }
00387 
00388     /* form initial .. */
00389     if (ISALLTRUE(GETENTRY(entryvec, seed_1)))
00390     {
00391         datum_l = (GISTTYPE *) palloc(GTHDRSIZE);
00392         SET_VARSIZE(datum_l, GTHDRSIZE);
00393         datum_l->flag = ALLISTRUE;
00394     }
00395     else
00396     {
00397         datum_l = (GISTTYPE *) palloc(GTHDRSIZE + SIGLEN);
00398         SET_VARSIZE(datum_l, GTHDRSIZE + SIGLEN);
00399         datum_l->flag = 0;
00400         memcpy((void *) GETSIGN(datum_l), (void *) GETSIGN(GETENTRY(entryvec, seed_1)), sizeof(BITVEC))
00401             ;
00402     }
00403     if (ISALLTRUE(GETENTRY(entryvec, seed_2)))
00404     {
00405         datum_r = (GISTTYPE *) palloc(GTHDRSIZE);
00406         SET_VARSIZE(datum_r, GTHDRSIZE);
00407         datum_r->flag = ALLISTRUE;
00408     }
00409     else
00410     {
00411         datum_r = (GISTTYPE *) palloc(GTHDRSIZE + SIGLEN);
00412         SET_VARSIZE(datum_r, GTHDRSIZE + SIGLEN);
00413         datum_r->flag = 0;
00414         memcpy((void *) GETSIGN(datum_r), (void *) GETSIGN(GETENTRY(entryvec, seed_2)), sizeof(BITVEC));
00415     }
00416 
00417     maxoff = OffsetNumberNext(maxoff);
00418     /* sort before ... */
00419     costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
00420     for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
00421     {
00422         costvector[j - 1].pos = j;
00423         _j = GETENTRY(entryvec, j);
00424         size_alpha = hemdist(datum_l, _j);
00425         size_beta = hemdist(datum_r, _j);
00426         costvector[j - 1].cost = abs(size_alpha - size_beta);
00427     }
00428     qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
00429 
00430     union_l = GETSIGN(datum_l);
00431     union_r = GETSIGN(datum_r);
00432 
00433     for (k = 0; k < maxoff; k++)
00434     {
00435         j = costvector[k].pos;
00436         if (j == seed_1)
00437         {
00438             *left++ = j;
00439             v->spl_nleft++;
00440             continue;
00441         }
00442         else if (j == seed_2)
00443         {
00444             *right++ = j;
00445             v->spl_nright++;
00446             continue;
00447         }
00448         _j = GETENTRY(entryvec, j);
00449         size_alpha = hemdist(datum_l, _j);
00450         size_beta = hemdist(datum_r, _j);
00451 
00452         if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.0001))
00453         {
00454             if (ISALLTRUE(datum_l) || ISALLTRUE(_j))
00455             {
00456                 if (!ISALLTRUE(datum_l))
00457                     MemSet((void *) union_l, 0xff, sizeof(BITVEC));
00458             }
00459             else
00460             {
00461                 ptr = GETSIGN(_j);
00462                 LOOPBYTE
00463                     union_l[i] |= ptr[i];
00464             }
00465             *left++ = j;
00466             v->spl_nleft++;
00467         }
00468         else
00469         {
00470             if (ISALLTRUE(datum_r) || ISALLTRUE(_j))
00471             {
00472                 if (!ISALLTRUE(datum_r))
00473                     MemSet((void *) union_r, 0xff, sizeof(BITVEC));
00474             }
00475             else
00476             {
00477                 ptr = GETSIGN(_j);
00478                 LOOPBYTE
00479                     union_r[i] |= ptr[i];
00480             }
00481             *right++ = j;
00482             v->spl_nright++;
00483         }
00484     }
00485 
00486     *right = *left = FirstOffsetNumber;
00487 
00488     v->spl_ldatum = PointerGetDatum(datum_l);
00489     v->spl_rdatum = PointerGetDatum(datum_r);
00490 
00491     PG_RETURN_POINTER(v);
00492 }
00493 
00494 
00495 Datum
00496 ghstore_consistent(PG_FUNCTION_ARGS)
00497 {
00498     GISTTYPE   *entry = (GISTTYPE *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(0))->key);
00499     StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
00500 
00501     /* Oid      subtype = PG_GETARG_OID(3); */
00502     bool       *recheck = (bool *) PG_GETARG_POINTER(4);
00503     bool        res = true;
00504     BITVECP     sign;
00505 
00506     /* All cases served by this function are inexact */
00507     *recheck = true;
00508 
00509     if (ISALLTRUE(entry))
00510         PG_RETURN_BOOL(true);
00511 
00512     sign = GETSIGN(entry);
00513 
00514     if (strategy == HStoreContainsStrategyNumber ||
00515         strategy == HStoreOldContainsStrategyNumber)
00516     {
00517         HStore     *query = PG_GETARG_HS(1);
00518         HEntry     *qe = ARRPTR(query);
00519         char       *qv = STRPTR(query);
00520         int         count = HS_COUNT(query);
00521         int         i;
00522 
00523         for (i = 0; res && i < count; ++i)
00524         {
00525             int         crc = crc32_sz((char *) HS_KEY(qe, qv, i), HS_KEYLEN(qe, i));
00526 
00527             if (GETBIT(sign, HASHVAL(crc)))
00528             {
00529                 if (!HS_VALISNULL(qe, i))
00530                 {
00531                     crc = crc32_sz((char *) HS_VAL(qe, qv, i), HS_VALLEN(qe, i));
00532                     if (!GETBIT(sign, HASHVAL(crc)))
00533                         res = false;
00534                 }
00535             }
00536             else
00537                 res = false;
00538         }
00539     }
00540     else if (strategy == HStoreExistsStrategyNumber)
00541     {
00542         text       *query = PG_GETARG_TEXT_PP(1);
00543         int         crc = crc32_sz(VARDATA_ANY(query), VARSIZE_ANY_EXHDR(query));
00544 
00545         res = (GETBIT(sign, HASHVAL(crc))) ? true : false;
00546     }
00547     else if (strategy == HStoreExistsAllStrategyNumber)
00548     {
00549         ArrayType  *query = PG_GETARG_ARRAYTYPE_P(1);
00550         Datum      *key_datums;
00551         bool       *key_nulls;
00552         int         key_count;
00553         int         i;
00554 
00555         deconstruct_array(query,
00556                           TEXTOID, -1, false, 'i',
00557                           &key_datums, &key_nulls, &key_count);
00558 
00559         for (i = 0; res && i < key_count; ++i)
00560         {
00561             int         crc;
00562 
00563             if (key_nulls[i])
00564                 continue;
00565             crc = crc32_sz(VARDATA(key_datums[i]), VARSIZE(key_datums[i]) - VARHDRSZ);
00566             if (!(GETBIT(sign, HASHVAL(crc))))
00567                 res = FALSE;
00568         }
00569     }
00570     else if (strategy == HStoreExistsAnyStrategyNumber)
00571     {
00572         ArrayType  *query = PG_GETARG_ARRAYTYPE_P(1);
00573         Datum      *key_datums;
00574         bool       *key_nulls;
00575         int         key_count;
00576         int         i;
00577 
00578         deconstruct_array(query,
00579                           TEXTOID, -1, false, 'i',
00580                           &key_datums, &key_nulls, &key_count);
00581 
00582         res = FALSE;
00583 
00584         for (i = 0; !res && i < key_count; ++i)
00585         {
00586             int         crc;
00587 
00588             if (key_nulls[i])
00589                 continue;
00590             crc = crc32_sz(VARDATA(key_datums[i]), VARSIZE(key_datums[i]) - VARHDRSZ);
00591             if (GETBIT(sign, HASHVAL(crc)))
00592                 res = TRUE;
00593         }
00594     }
00595     else
00596         elog(ERROR, "Unsupported strategy number: %d", strategy);
00597 
00598     PG_RETURN_BOOL(res);
00599 }