PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
gistxlog.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * gistxlog.c
4  * WAL replay logic for GiST.
5  *
6  *
7  * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * IDENTIFICATION
11  * src/backend/access/gist/gistxlog.c
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15 
16 #include "access/gist_private.h"
17 #include "access/xloginsert.h"
18 #include "access/xlogutils.h"
19 #include "utils/memutils.h"
20 
21 static MemoryContext opCtx; /* working memory for operations */
22 
23 /*
24  * Replay the clearing of F_FOLLOW_RIGHT flag on a child page.
25  *
26  * Even if the WAL record includes a full-page image, we have to update the
27  * follow-right flag, because that change is not included in the full-page
28  * image. To be sure that the intermediate state with the wrong flag value is
29  * not visible to concurrent Hot Standby queries, this function handles
30  * restoring the full-page image as well as updating the flag. (Note that
31  * we never need to do anything else to the child page in the current WAL
32  * action.)
33  */
34 static void
36 {
37  XLogRecPtr lsn = record->EndRecPtr;
38  Buffer buffer;
39  Page page;
40  XLogRedoAction action;
41 
42  /*
43  * Note that we still update the page even if it was restored from a full
44  * page image, because the updated NSN is not included in the image.
45  */
46  action = XLogReadBufferForRedo(record, block_id, &buffer);
47  if (action == BLK_NEEDS_REDO || action == BLK_RESTORED)
48  {
49  page = BufferGetPage(buffer);
50 
51  GistPageSetNSN(page, lsn);
53 
54  PageSetLSN(page, lsn);
55  MarkBufferDirty(buffer);
56  }
57  if (BufferIsValid(buffer))
58  UnlockReleaseBuffer(buffer);
59 }
60 
61 /*
62  * redo any page update (except page split)
63  */
64 static void
66 {
67  XLogRecPtr lsn = record->EndRecPtr;
69  Buffer buffer;
70  Page page;
71 
72  if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
73  {
74  char *begin;
75  char *data;
76  Size datalen;
77  int ninserted = 0;
78 
79  data = begin = XLogRecGetBlockData(record, 0, &datalen);
80 
81  page = (Page) BufferGetPage(buffer);
82 
83  /* Delete old tuples */
84  if (xldata->ntodelete > 0)
85  {
86  OffsetNumber *todelete = (OffsetNumber *) data;
87 
88  data += sizeof(OffsetNumber) * xldata->ntodelete;
89 
90  PageIndexMultiDelete(page, todelete, xldata->ntodelete);
91  if (GistPageIsLeaf(page))
93  }
94 
95  /* add tuples */
96  if (data - begin < datalen)
97  {
100 
101  while (data - begin < datalen)
102  {
103  IndexTuple itup = (IndexTuple) data;
104  Size sz = IndexTupleSize(itup);
105  OffsetNumber l;
106 
107  data += sz;
108 
109  l = PageAddItem(page, (Item) itup, sz, off, false, false);
110  if (l == InvalidOffsetNumber)
111  elog(ERROR, "failed to add item to GiST index page, size %d bytes",
112  (int) sz);
113  off++;
114  ninserted++;
115  }
116  }
117 
118  Assert(ninserted == xldata->ntoinsert);
119 
120  PageSetLSN(page, lsn);
121  MarkBufferDirty(buffer);
122  }
123 
124  /*
125  * Fix follow-right data on left child page
126  *
127  * This must be done while still holding the lock on the target page. Note
128  * that even if the target page no longer exists, we still attempt to
129  * replay the change on the child page.
130  */
131  if (XLogRecHasBlockRef(record, 1))
132  gistRedoClearFollowRight(record, 1);
133 
134  if (BufferIsValid(buffer))
135  UnlockReleaseBuffer(buffer);
136 }
137 
138 /*
139  * Returns an array of index pointers.
140  */
141 static IndexTuple *
142 decodePageSplitRecord(char *begin, int len, int *n)
143 {
144  char *ptr;
145  int i = 0;
146  IndexTuple *tuples;
147 
148  /* extract the number of tuples */
149  memcpy(n, begin, sizeof(int));
150  ptr = begin + sizeof(int);
151 
152  tuples = palloc(*n * sizeof(IndexTuple));
153 
154  for (i = 0; i < *n; i++)
155  {
156  Assert(ptr - begin < len);
157  tuples[i] = (IndexTuple) ptr;
158  ptr += IndexTupleSize((IndexTuple) ptr);
159  }
160  Assert(ptr - begin == len);
161 
162  return tuples;
163 }
164 
165 static void
167 {
168  XLogRecPtr lsn = record->EndRecPtr;
169  gistxlogPageSplit *xldata = (gistxlogPageSplit *) XLogRecGetData(record);
170  Buffer firstbuffer = InvalidBuffer;
171  Buffer buffer;
172  Page page;
173  int i;
174  bool isrootsplit = false;
175 
176  /*
177  * We must hold lock on the first-listed page throughout the action,
178  * including while updating the left child page (if any). We can unlock
179  * remaining pages in the list as soon as they've been written, because
180  * there is no path for concurrent queries to reach those pages without
181  * first visiting the first-listed page.
182  */
183 
184  /* loop around all pages */
185  for (i = 0; i < xldata->npage; i++)
186  {
187  int flags;
188  char *data;
189  Size datalen;
190  int num;
192  IndexTuple *tuples;
193 
194  XLogRecGetBlockTag(record, i + 1, NULL, NULL, &blkno);
195  if (blkno == GIST_ROOT_BLKNO)
196  {
197  Assert(i == 0);
198  isrootsplit = true;
199  }
200 
201  buffer = XLogInitBufferForRedo(record, i + 1);
202  page = (Page) BufferGetPage(buffer);
203  data = XLogRecGetBlockData(record, i + 1, &datalen);
204 
205  tuples = decodePageSplitRecord(data, datalen, &num);
206 
207  /* ok, clear buffer */
208  if (xldata->origleaf && blkno != GIST_ROOT_BLKNO)
209  flags = F_LEAF;
210  else
211  flags = 0;
212  GISTInitBuffer(buffer, flags);
213 
214  /* and fill it */
215  gistfillbuffer(page, tuples, num, FirstOffsetNumber);
216 
217  if (blkno == GIST_ROOT_BLKNO)
218  {
219  GistPageGetOpaque(page)->rightlink = InvalidBlockNumber;
220  GistPageSetNSN(page, xldata->orignsn);
221  GistClearFollowRight(page);
222  }
223  else
224  {
225  if (i < xldata->npage - 1)
226  {
227  BlockNumber nextblkno;
228 
229  XLogRecGetBlockTag(record, i + 2, NULL, NULL, &nextblkno);
230  GistPageGetOpaque(page)->rightlink = nextblkno;
231  }
232  else
233  GistPageGetOpaque(page)->rightlink = xldata->origrlink;
234  GistPageSetNSN(page, xldata->orignsn);
235  if (i < xldata->npage - 1 && !isrootsplit &&
236  xldata->markfollowright)
237  GistMarkFollowRight(page);
238  else
239  GistClearFollowRight(page);
240  }
241 
242  PageSetLSN(page, lsn);
243  MarkBufferDirty(buffer);
244 
245  if (i == 0)
246  firstbuffer = buffer;
247  else
248  UnlockReleaseBuffer(buffer);
249  }
250 
251  /* Fix follow-right data on left child page, if any */
252  if (XLogRecHasBlockRef(record, 0))
253  gistRedoClearFollowRight(record, 0);
254 
255  /* Finally, release lock on the first page */
256  UnlockReleaseBuffer(firstbuffer);
257 }
258 
259 static void
261 {
262  XLogRecPtr lsn = record->EndRecPtr;
263  Buffer buffer;
264  Page page;
265 
266  buffer = XLogInitBufferForRedo(record, 0);
268  page = (Page) BufferGetPage(buffer);
269 
270  GISTInitBuffer(buffer, F_LEAF);
271 
272  PageSetLSN(page, lsn);
273 
274  MarkBufferDirty(buffer);
275  UnlockReleaseBuffer(buffer);
276 }
277 
278 void
280 {
281  uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
282  MemoryContext oldCxt;
283 
284  /*
285  * GiST indexes do not require any conflict processing. NB: If we ever
286  * implement a similar optimization we have in b-tree, and remove killed
287  * tuples outside VACUUM, we'll need to handle that here.
288  */
289 
290  oldCxt = MemoryContextSwitchTo(opCtx);
291  switch (info)
292  {
294  gistRedoPageUpdateRecord(record);
295  break;
297  gistRedoPageSplitRecord(record);
298  break;
300  gistRedoCreateIndex(record);
301  break;
302  default:
303  elog(PANIC, "gist_redo: unknown op code %u", info);
304  }
305 
306  MemoryContextSwitchTo(oldCxt);
307  MemoryContextReset(opCtx);
308 }
309 
310 void
312 {
313  opCtx = createTempGistContext();
314 }
315 
316 void
318 {
319  MemoryContextDelete(opCtx);
320 }
321 
322 /*
323  * Write WAL record of a page split.
324  */
326 gistXLogSplit(RelFileNode node, BlockNumber blkno, bool page_is_leaf,
327  SplitedPageLayout *dist,
328  BlockNumber origrlink, GistNSN orignsn,
329  Buffer leftchildbuf, bool markfollowright)
330 {
331  gistxlogPageSplit xlrec;
332  SplitedPageLayout *ptr;
333  int npage = 0;
334  XLogRecPtr recptr;
335  int i;
336 
337  for (ptr = dist; ptr; ptr = ptr->next)
338  npage++;
339 
340  xlrec.origrlink = origrlink;
341  xlrec.orignsn = orignsn;
342  xlrec.origleaf = page_is_leaf;
343  xlrec.npage = (uint16) npage;
344  xlrec.markfollowright = markfollowright;
345 
346  XLogBeginInsert();
347 
348  /*
349  * Include a full page image of the child buf. (only necessary if a
350  * checkpoint happened since the child page was split)
351  */
352  if (BufferIsValid(leftchildbuf))
353  XLogRegisterBuffer(0, leftchildbuf, REGBUF_STANDARD);
354 
355  /*
356  * NOTE: We register a lot of data. The caller must've called
357  * XLogEnsureRecordSpace() to prepare for that. We cannot do it here,
358  * because we're already in a critical section. If you change the number
359  * of buffer or data registrations here, make sure you modify the
360  * XLogEnsureRecordSpace() calls accordingly!
361  */
362  XLogRegisterData((char *) &xlrec, sizeof(gistxlogPageSplit));
363 
364  i = 1;
365  for (ptr = dist; ptr; ptr = ptr->next)
366  {
368  XLogRegisterBufData(i, (char *) &(ptr->block.num), sizeof(int));
369  XLogRegisterBufData(i, (char *) ptr->list, ptr->lenlist);
370  i++;
371  }
372 
373  recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_SPLIT);
374 
375  return recptr;
376 }
377 
378 /*
379  * Write XLOG record describing a page update. The update can include any
380  * number of deletions and/or insertions of tuples on a single index page.
381  *
382  * If this update inserts a downlink for a split page, also record that
383  * the F_FOLLOW_RIGHT flag on the child page is cleared and NSN set.
384  *
385  * Note that both the todelete array and the tuples are marked as belonging
386  * to the target buffer; they need not be stored in XLOG if XLogInsert decides
387  * to log the whole buffer contents instead.
388  */
391  OffsetNumber *todelete, int ntodelete,
392  IndexTuple *itup, int ituplen,
393  Buffer leftchildbuf)
394 {
395  gistxlogPageUpdate xlrec;
396  int i;
397  XLogRecPtr recptr;
398 
399  xlrec.ntodelete = ntodelete;
400  xlrec.ntoinsert = ituplen;
401 
402  XLogBeginInsert();
403  XLogRegisterData((char *) &xlrec, sizeof(gistxlogPageUpdate));
404 
406  XLogRegisterBufData(0, (char *) todelete, sizeof(OffsetNumber) * ntodelete);
407 
408  /* new tuples */
409  for (i = 0; i < ituplen; i++)
410  XLogRegisterBufData(0, (char *) (itup[i]), IndexTupleSize(itup[i]));
411 
412  /*
413  * Include a full page image of the child buf. (only necessary if a
414  * checkpoint happened since the child page was split)
415  */
416  if (BufferIsValid(leftchildbuf))
417  XLogRegisterBuffer(1, leftchildbuf, REGBUF_STANDARD);
418 
419  recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_UPDATE);
420 
421  return recptr;
422 }
void XLogRegisterBufData(uint8 block_id, char *data, int len)
Definition: xloginsert.c:361
static void gistRedoPageUpdateRecord(XLogReaderState *record)
Definition: gistxlog.c:65
#define XLOG_GIST_CREATE_INDEX
Definition: gist_private.h:191
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:203
#define PageIsEmpty(page)
Definition: bufpage.h:219
OffsetNumber PageAddItem(Page page, Item item, Size size, OffsetNumber offsetNumber, bool overwrite, bool is_heap)
Definition: bufpage.c:350
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1445
MemoryContext createTempGistContext(void)
Definition: gist.c:103
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:213
#define GistMarkTuplesDeleted(page)
Definition: gist.h:140
void gistfillbuffer(Page page, IndexTuple *itup, int len, OffsetNumber off)
Definition: gistutil.c:29
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
#define XLOG_GIST_PAGE_SPLIT
Definition: gist_private.h:189
unsigned char uint8
Definition: c.h:263
Pointer Item
Definition: item.h:17
#define GistPageSetNSN(page, val)
Definition: gist.h:152
#define InvalidBuffer
Definition: buf.h:25
#define REGBUF_WILL_INIT
Definition: xloginsert.h:32
#define XLogRecHasBlockRef(decoder, block_id)
Definition: xlogreader.h:204
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:138
uint32 BlockNumber
Definition: block.h:31
IndexTupleData * list
Definition: gist_private.h:239
#define PANIC
Definition: elog.h:53
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:354
XLogRecPtr EndRecPtr
Definition: xlogreader.h:114
uint16 OffsetNumber
Definition: off.h:24
gistxlogPage block
Definition: gist_private.h:238
unsigned short uint16
Definition: c.h:264
#define XLogRecGetData(decoder)
Definition: xlogreader.h:201
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3315
#define ERROR
Definition: elog.h:43
Buffer XLogInitBufferForRedo(XLogReaderState *record, uint8 block_id)
Definition: xlogutils.c:300
#define FirstOffsetNumber
Definition: off.h:27
IndexTupleData * IndexTuple
Definition: itup.h:53
#define REGBUF_STANDARD
Definition: xloginsert.h:35
static void gistRedoPageSplitRecord(XLogReaderState *record)
Definition: gistxlog.c:166
static IndexTuple * decodePageSplitRecord(char *begin, int len, int *n)
Definition: gistxlog.c:142
BlockNumber origrlink
Definition: gist_private.h:217
struct SplitedPageLayout * next
Definition: gist_private.h:245
#define BufferGetPage(buffer)
Definition: bufmgr.h:174
#define XLogRecGetInfo(decoder)
Definition: xlogreader.h:197
#define GistPageIsLeaf(page)
Definition: gist.h:132
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:323
bool XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id, RelFileNode *rnode, ForkNumber *forknum, BlockNumber *blknum)
Definition: xlogreader.c:1261
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:408
#define GistClearFollowRight(page)
Definition: gist.h:149
#define XLOG_GIST_PAGE_UPDATE
Definition: gist_private.h:187
char * XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len)
Definition: xlogreader.c:1285
BlockNumber blkno
Definition: gistvacuum.c:105
#define InvalidOffsetNumber
Definition: off.h:26
XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id, Buffer *buf)
Definition: xlogutils.c:288
#define GistPageGetOpaque(page)
Definition: gist.h:130
#define NULL
Definition: c.h:226
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:667
#define XLR_INFO_MASK
Definition: xlogrecord.h:62
void gist_redo(XLogReaderState *record)
Definition: gistxlog.c:279
void gist_xlog_cleanup(void)
Definition: gistxlog.c:317
static void gistRedoClearFollowRight(XLogReaderState *record, uint8 block_id)
Definition: gistxlog.c:35
void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
Definition: bufpage.c:823
XLogRedoAction
Definition: xlogutils.h:27
#define OffsetNumberNext(offsetNumber)
Definition: off.h:53
size_t Size
Definition: c.h:352
#define InvalidBlockNumber
Definition: block.h:33
void gist_xlog_startup(void)
Definition: gistxlog.c:311
#define BufferIsValid(bufnum)
Definition: bufmgr.h:128
#define GistMarkFollowRight(page)
Definition: gist.h:148
XLogRecPtr GistNSN
Definition: gist.h:50
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2588
void * palloc(Size size)
Definition: mcxt.c:894
#define F_LEAF
Definition: gist.h:42
int i
#define GIST_ROOT_BLKNO
Definition: gist_private.h:298
void GISTInitBuffer(Buffer b, uint32 f)
Definition: gistutil.c:697
static MemoryContext opCtx
Definition: gistxlog.c:21
#define elog
Definition: elog.h:218
XLogRecPtr gistXLogUpdate(RelFileNode node, Buffer buffer, OffsetNumber *todelete, int ntodelete, IndexTuple *itup, int ituplen, Buffer leftchildbuf)
Definition: gistxlog.c:390
void XLogBeginInsert(void)
Definition: xloginsert.c:120
#define PageSetLSN(page, lsn)
Definition: bufpage.h:365
int Buffer
Definition: buf.h:23
Pointer Page
Definition: bufpage.h:74
#define IndexTupleSize(itup)
Definition: itup.h:70
static void gistRedoCreateIndex(XLogReaderState *record)
Definition: gistxlog.c:260
XLogRecPtr gistXLogSplit(RelFileNode node, BlockNumber blkno, bool page_is_leaf, SplitedPageLayout *dist, BlockNumber origrlink, GistNSN orignsn, Buffer leftchildbuf, bool markfollowright)
Definition: gistxlog.c:326