PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
nodeHashjoin.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * nodeHashjoin.c
4  * Routines to handle hash join nodes
5  *
6  * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/executor/nodeHashjoin.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 
16 #include "postgres.h"
17 
18 #include "access/htup_details.h"
19 #include "executor/executor.h"
20 #include "executor/hashjoin.h"
21 #include "executor/nodeHash.h"
22 #include "executor/nodeHashjoin.h"
23 #include "miscadmin.h"
24 #include "utils/memutils.h"
25 
26 
27 /*
28  * States of the ExecHashJoin state machine
29  */
30 #define HJ_BUILD_HASHTABLE 1
31 #define HJ_NEED_NEW_OUTER 2
32 #define HJ_SCAN_BUCKET 3
33 #define HJ_FILL_OUTER_TUPLE 4
34 #define HJ_FILL_INNER_TUPLES 5
35 #define HJ_NEED_NEW_BATCH 6
36 
37 /* Returns true if doing null-fill on outer relation */
38 #define HJ_FILL_OUTER(hjstate) ((hjstate)->hj_NullInnerTupleSlot != NULL)
39 /* Returns true if doing null-fill on inner relation */
40 #define HJ_FILL_INNER(hjstate) ((hjstate)->hj_NullOuterTupleSlot != NULL)
41 
43  HashJoinState *hjstate,
44  uint32 *hashvalue);
46  BufFile *file,
47  uint32 *hashvalue,
48  TupleTableSlot *tupleSlot);
49 static bool ExecHashJoinNewBatch(HashJoinState *hjstate);
50 
51 
52 /* ----------------------------------------------------------------
53  * ExecHashJoin
54  *
55  * This function implements the Hybrid Hashjoin algorithm.
56  *
57  * Note: the relation we build hash table on is the "inner"
58  * the other one is "outer".
59  * ----------------------------------------------------------------
60  */
61 TupleTableSlot * /* return: a tuple or NULL */
63 {
64  PlanState *outerNode;
65  HashState *hashNode;
66  List *joinqual;
67  List *otherqual;
68  ExprContext *econtext;
69  ExprDoneCond isDone;
70  HashJoinTable hashtable;
71  TupleTableSlot *outerTupleSlot;
72  uint32 hashvalue;
73  int batchno;
74 
75  /*
76  * get information from HashJoin node
77  */
78  joinqual = node->js.joinqual;
79  otherqual = node->js.ps.qual;
80  hashNode = (HashState *) innerPlanState(node);
81  outerNode = outerPlanState(node);
82  hashtable = node->hj_HashTable;
83  econtext = node->js.ps.ps_ExprContext;
84 
85  /*
86  * Check to see if we're still projecting out tuples from a previous join
87  * tuple (because there is a function-returning-set in the projection
88  * expressions). If so, try to project another one.
89  */
90  if (node->js.ps.ps_TupFromTlist)
91  {
92  TupleTableSlot *result;
93 
94  result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
95  if (isDone == ExprMultipleResult)
96  return result;
97  /* Done with that source tuple... */
98  node->js.ps.ps_TupFromTlist = false;
99  }
100 
101  /*
102  * Reset per-tuple memory context to free any expression evaluation
103  * storage allocated in the previous tuple cycle. Note this can't happen
104  * until we're done projecting out tuples from a join tuple.
105  */
106  ResetExprContext(econtext);
107 
108  /*
109  * run the hash join state machine
110  */
111  for (;;)
112  {
113  switch (node->hj_JoinState)
114  {
115  case HJ_BUILD_HASHTABLE:
116 
117  /*
118  * First time through: build hash table for inner relation.
119  */
120  Assert(hashtable == NULL);
121 
122  /*
123  * If the outer relation is completely empty, and it's not
124  * right/full join, we can quit without building the hash
125  * table. However, for an inner join it is only a win to
126  * check this when the outer relation's startup cost is less
127  * than the projected cost of building the hash table.
128  * Otherwise it's best to build the hash table first and see
129  * if the inner relation is empty. (When it's a left join, we
130  * should always make this check, since we aren't going to be
131  * able to skip the join on the strength of an empty inner
132  * relation anyway.)
133  *
134  * If we are rescanning the join, we make use of information
135  * gained on the previous scan: don't bother to try the
136  * prefetch if the previous scan found the outer relation
137  * nonempty. This is not 100% reliable since with new
138  * parameters the outer relation might yield different
139  * results, but it's a good heuristic.
140  *
141  * The only way to make the check is to try to fetch a tuple
142  * from the outer plan node. If we succeed, we have to stash
143  * it away for later consumption by ExecHashJoinOuterGetTuple.
144  */
145  if (HJ_FILL_INNER(node))
146  {
147  /* no chance to not build the hash table */
149  }
150  else if (HJ_FILL_OUTER(node) ||
151  (outerNode->plan->startup_cost < hashNode->ps.plan->total_cost &&
152  !node->hj_OuterNotEmpty))
153  {
154  node->hj_FirstOuterTupleSlot = ExecProcNode(outerNode);
156  {
157  node->hj_OuterNotEmpty = false;
158  return NULL;
159  }
160  else
161  node->hj_OuterNotEmpty = true;
162  }
163  else
165 
166  /*
167  * create the hash table
168  */
169  hashtable = ExecHashTableCreate((Hash *) hashNode->ps.plan,
170  node->hj_HashOperators,
171  HJ_FILL_INNER(node));
172  node->hj_HashTable = hashtable;
173 
174  /*
175  * execute the Hash node, to build the hash table
176  */
177  hashNode->hashtable = hashtable;
178  (void) MultiExecProcNode((PlanState *) hashNode);
179 
180  /*
181  * If the inner relation is completely empty, and we're not
182  * doing a left outer join, we can quit without scanning the
183  * outer relation.
184  */
185  if (hashtable->totalTuples == 0 && !HJ_FILL_OUTER(node))
186  return NULL;
187 
188  /*
189  * need to remember whether nbatch has increased since we
190  * began scanning the outer relation
191  */
192  hashtable->nbatch_outstart = hashtable->nbatch;
193 
194  /*
195  * Reset OuterNotEmpty for scan. (It's OK if we fetched a
196  * tuple above, because ExecHashJoinOuterGetTuple will
197  * immediately set it again.)
198  */
199  node->hj_OuterNotEmpty = false;
200 
202 
203  /* FALL THRU */
204 
205  case HJ_NEED_NEW_OUTER:
206 
207  /*
208  * We don't have an outer tuple, try to get the next one
209  */
210  outerTupleSlot = ExecHashJoinOuterGetTuple(outerNode,
211  node,
212  &hashvalue);
213  if (TupIsNull(outerTupleSlot))
214  {
215  /* end of batch, or maybe whole join */
216  if (HJ_FILL_INNER(node))
217  {
218  /* set up to scan for unmatched inner tuples */
221  }
222  else
224  continue;
225  }
226 
227  econtext->ecxt_outertuple = outerTupleSlot;
228  node->hj_MatchedOuter = false;
229 
230  /*
231  * Find the corresponding bucket for this tuple in the main
232  * hash table or skew hash table.
233  */
234  node->hj_CurHashValue = hashvalue;
235  ExecHashGetBucketAndBatch(hashtable, hashvalue,
236  &node->hj_CurBucketNo, &batchno);
237  node->hj_CurSkewBucketNo = ExecHashGetSkewBucket(hashtable,
238  hashvalue);
239  node->hj_CurTuple = NULL;
240 
241  /*
242  * The tuple might not belong to the current batch (where
243  * "current batch" includes the skew buckets if any).
244  */
245  if (batchno != hashtable->curbatch &&
247  {
248  /*
249  * Need to postpone this outer tuple to a later batch.
250  * Save it in the corresponding outer-batch file.
251  */
252  Assert(batchno > hashtable->curbatch);
254  hashvalue,
255  &hashtable->outerBatchFile[batchno]);
256  /* Loop around, staying in HJ_NEED_NEW_OUTER state */
257  continue;
258  }
259 
260  /* OK, let's scan the bucket for matches */
262 
263  /* FALL THRU */
264 
265  case HJ_SCAN_BUCKET:
266 
267  /*
268  * We check for interrupts here because this corresponds to
269  * where we'd fetch a row from a child plan node in other join
270  * types.
271  */
273 
274  /*
275  * Scan the selected hash bucket for matches to current outer
276  */
277  if (!ExecScanHashBucket(node, econtext))
278  {
279  /* out of matches; check for possible outer-join fill */
281  continue;
282  }
283 
284  /*
285  * We've got a match, but still need to test non-hashed quals.
286  * ExecScanHashBucket already set up all the state needed to
287  * call ExecQual.
288  *
289  * If we pass the qual, then save state for next call and have
290  * ExecProject form the projection, store it in the tuple
291  * table, and return the slot.
292  *
293  * Only the joinquals determine tuple match status, but all
294  * quals must pass to actually return the tuple.
295  */
296  if (joinqual == NIL || ExecQual(joinqual, econtext, false))
297  {
298  node->hj_MatchedOuter = true;
300 
301  /* In an antijoin, we never return a matched tuple */
302  if (node->js.jointype == JOIN_ANTI)
303  {
305  continue;
306  }
307 
308  /*
309  * In a semijoin, we'll consider returning the first
310  * match, but after that we're done with this outer tuple.
311  */
312  if (node->js.jointype == JOIN_SEMI)
314 
315  if (otherqual == NIL ||
316  ExecQual(otherqual, econtext, false))
317  {
318  TupleTableSlot *result;
319 
320  result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
321 
322  if (isDone != ExprEndResult)
323  {
324  node->js.ps.ps_TupFromTlist =
325  (isDone == ExprMultipleResult);
326  return result;
327  }
328  }
329  else
330  InstrCountFiltered2(node, 1);
331  }
332  else
333  InstrCountFiltered1(node, 1);
334  break;
335 
336  case HJ_FILL_OUTER_TUPLE:
337 
338  /*
339  * The current outer tuple has run out of matches, so check
340  * whether to emit a dummy outer-join tuple. Whether we emit
341  * one or not, the next state is NEED_NEW_OUTER.
342  */
344 
345  if (!node->hj_MatchedOuter &&
346  HJ_FILL_OUTER(node))
347  {
348  /*
349  * Generate a fake join tuple with nulls for the inner
350  * tuple, and return it if it passes the non-join quals.
351  */
352  econtext->ecxt_innertuple = node->hj_NullInnerTupleSlot;
353 
354  if (otherqual == NIL ||
355  ExecQual(otherqual, econtext, false))
356  {
357  TupleTableSlot *result;
358 
359  result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
360 
361  if (isDone != ExprEndResult)
362  {
363  node->js.ps.ps_TupFromTlist =
364  (isDone == ExprMultipleResult);
365  return result;
366  }
367  }
368  else
369  InstrCountFiltered2(node, 1);
370  }
371  break;
372 
374 
375  /*
376  * We have finished a batch, but we are doing right/full join,
377  * so any unmatched inner tuples in the hashtable have to be
378  * emitted before we continue to the next batch.
379  */
380  if (!ExecScanHashTableForUnmatched(node, econtext))
381  {
382  /* no more unmatched tuples */
384  continue;
385  }
386 
387  /*
388  * Generate a fake join tuple with nulls for the outer tuple,
389  * and return it if it passes the non-join quals.
390  */
391  econtext->ecxt_outertuple = node->hj_NullOuterTupleSlot;
392 
393  if (otherqual == NIL ||
394  ExecQual(otherqual, econtext, false))
395  {
396  TupleTableSlot *result;
397 
398  result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
399 
400  if (isDone != ExprEndResult)
401  {
402  node->js.ps.ps_TupFromTlist =
403  (isDone == ExprMultipleResult);
404  return result;
405  }
406  }
407  else
408  InstrCountFiltered2(node, 1);
409  break;
410 
411  case HJ_NEED_NEW_BATCH:
412 
413  /*
414  * Try to advance to next batch. Done if there are no more.
415  */
416  if (!ExecHashJoinNewBatch(node))
417  return NULL; /* end of join */
419  break;
420 
421  default:
422  elog(ERROR, "unrecognized hashjoin state: %d",
423  (int) node->hj_JoinState);
424  }
425  }
426 }
427 
428 /* ----------------------------------------------------------------
429  * ExecInitHashJoin
430  *
431  * Init routine for HashJoin node.
432  * ----------------------------------------------------------------
433  */
435 ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
436 {
437  HashJoinState *hjstate;
438  Plan *outerNode;
439  Hash *hashNode;
440  List *lclauses;
441  List *rclauses;
442  List *hoperators;
443  ListCell *l;
444 
445  /* check for unsupported flags */
446  Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
447 
448  /*
449  * create state structure
450  */
451  hjstate = makeNode(HashJoinState);
452  hjstate->js.ps.plan = (Plan *) node;
453  hjstate->js.ps.state = estate;
454 
455  /*
456  * Miscellaneous initialization
457  *
458  * create expression context for node
459  */
460  ExecAssignExprContext(estate, &hjstate->js.ps);
461 
462  /*
463  * initialize child expressions
464  */
465  hjstate->js.ps.targetlist = (List *)
467  (PlanState *) hjstate);
468  hjstate->js.ps.qual = (List *)
469  ExecInitExpr((Expr *) node->join.plan.qual,
470  (PlanState *) hjstate);
471  hjstate->js.jointype = node->join.jointype;
472  hjstate->js.joinqual = (List *)
473  ExecInitExpr((Expr *) node->join.joinqual,
474  (PlanState *) hjstate);
475  hjstate->hashclauses = (List *)
476  ExecInitExpr((Expr *) node->hashclauses,
477  (PlanState *) hjstate);
478 
479  /*
480  * initialize child nodes
481  *
482  * Note: we could suppress the REWIND flag for the inner input, which
483  * would amount to betting that the hash will be a single batch. Not
484  * clear if this would be a win or not.
485  */
486  outerNode = outerPlan(node);
487  hashNode = (Hash *) innerPlan(node);
488 
489  outerPlanState(hjstate) = ExecInitNode(outerNode, estate, eflags);
490  innerPlanState(hjstate) = ExecInitNode((Plan *) hashNode, estate, eflags);
491 
492  /*
493  * tuple table initialization
494  */
495  ExecInitResultTupleSlot(estate, &hjstate->js.ps);
496  hjstate->hj_OuterTupleSlot = ExecInitExtraTupleSlot(estate);
497 
498  /* set up null tuples for outer joins, if needed */
499  switch (node->join.jointype)
500  {
501  case JOIN_INNER:
502  case JOIN_SEMI:
503  break;
504  case JOIN_LEFT:
505  case JOIN_ANTI:
506  hjstate->hj_NullInnerTupleSlot =
507  ExecInitNullTupleSlot(estate,
509  break;
510  case JOIN_RIGHT:
511  hjstate->hj_NullOuterTupleSlot =
512  ExecInitNullTupleSlot(estate,
514  break;
515  case JOIN_FULL:
516  hjstate->hj_NullOuterTupleSlot =
517  ExecInitNullTupleSlot(estate,
519  hjstate->hj_NullInnerTupleSlot =
520  ExecInitNullTupleSlot(estate,
522  break;
523  default:
524  elog(ERROR, "unrecognized join type: %d",
525  (int) node->join.jointype);
526  }
527 
528  /*
529  * now for some voodoo. our temporary tuple slot is actually the result
530  * tuple slot of the Hash node (which is our inner plan). we can do this
531  * because Hash nodes don't return tuples via ExecProcNode() -- instead
532  * the hash join node uses ExecScanHashBucket() to get at the contents of
533  * the hash table. -cim 6/9/91
534  */
535  {
536  HashState *hashstate = (HashState *) innerPlanState(hjstate);
537  TupleTableSlot *slot = hashstate->ps.ps_ResultTupleSlot;
538 
539  hjstate->hj_HashTupleSlot = slot;
540  }
541 
542  /*
543  * initialize tuple type and projection info
544  */
545  ExecAssignResultTypeFromTL(&hjstate->js.ps);
546  ExecAssignProjectionInfo(&hjstate->js.ps, NULL);
547 
550 
551  /*
552  * initialize hash-specific info
553  */
554  hjstate->hj_HashTable = NULL;
555  hjstate->hj_FirstOuterTupleSlot = NULL;
556 
557  hjstate->hj_CurHashValue = 0;
558  hjstate->hj_CurBucketNo = 0;
560  hjstate->hj_CurTuple = NULL;
561 
562  /*
563  * Deconstruct the hash clauses into outer and inner argument values, so
564  * that we can evaluate those subexpressions separately. Also make a list
565  * of the hash operator OIDs, in preparation for looking up the hash
566  * functions to use.
567  */
568  lclauses = NIL;
569  rclauses = NIL;
570  hoperators = NIL;
571  foreach(l, hjstate->hashclauses)
572  {
573  FuncExprState *fstate = (FuncExprState *) lfirst(l);
574  OpExpr *hclause;
575 
576  Assert(IsA(fstate, FuncExprState));
577  hclause = (OpExpr *) fstate->xprstate.expr;
578  Assert(IsA(hclause, OpExpr));
579  lclauses = lappend(lclauses, linitial(fstate->args));
580  rclauses = lappend(rclauses, lsecond(fstate->args));
581  hoperators = lappend_oid(hoperators, hclause->opno);
582  }
583  hjstate->hj_OuterHashKeys = lclauses;
584  hjstate->hj_InnerHashKeys = rclauses;
585  hjstate->hj_HashOperators = hoperators;
586  /* child Hash node needs to evaluate inner hash keys, too */
587  ((HashState *) innerPlanState(hjstate))->hashkeys = rclauses;
588 
589  hjstate->js.ps.ps_TupFromTlist = false;
590  hjstate->hj_JoinState = HJ_BUILD_HASHTABLE;
591  hjstate->hj_MatchedOuter = false;
592  hjstate->hj_OuterNotEmpty = false;
593 
594  return hjstate;
595 }
596 
597 /* ----------------------------------------------------------------
598  * ExecEndHashJoin
599  *
600  * clean up routine for HashJoin node
601  * ----------------------------------------------------------------
602  */
603 void
605 {
606  /*
607  * Free hash table
608  */
609  if (node->hj_HashTable)
610  {
612  node->hj_HashTable = NULL;
613  }
614 
615  /*
616  * Free the exprcontext
617  */
618  ExecFreeExprContext(&node->js.ps);
619 
620  /*
621  * clean out the tuple table
622  */
626 
627  /*
628  * clean up subtrees
629  */
632 }
633 
634 /*
635  * ExecHashJoinOuterGetTuple
636  *
637  * get the next outer tuple for hashjoin: either by
638  * executing the outer plan node in the first pass, or from
639  * the temp files for the hashjoin batches.
640  *
641  * Returns a null slot if no more outer tuples (within the current batch).
642  *
643  * On success, the tuple's hash value is stored at *hashvalue --- this is
644  * either originally computed, or re-read from the temp file.
645  */
646 static TupleTableSlot *
648  HashJoinState *hjstate,
649  uint32 *hashvalue)
650 {
651  HashJoinTable hashtable = hjstate->hj_HashTable;
652  int curbatch = hashtable->curbatch;
653  TupleTableSlot *slot;
654 
655  if (curbatch == 0) /* if it is the first pass */
656  {
657  /*
658  * Check to see if first outer tuple was already fetched by
659  * ExecHashJoin() and not used yet.
660  */
661  slot = hjstate->hj_FirstOuterTupleSlot;
662  if (!TupIsNull(slot))
663  hjstate->hj_FirstOuterTupleSlot = NULL;
664  else
665  slot = ExecProcNode(outerNode);
666 
667  while (!TupIsNull(slot))
668  {
669  /*
670  * We have to compute the tuple's hash value.
671  */
672  ExprContext *econtext = hjstate->js.ps.ps_ExprContext;
673 
674  econtext->ecxt_outertuple = slot;
675  if (ExecHashGetHashValue(hashtable, econtext,
676  hjstate->hj_OuterHashKeys,
677  true, /* outer tuple */
678  HJ_FILL_OUTER(hjstate),
679  hashvalue))
680  {
681  /* remember outer relation is not empty for possible rescan */
682  hjstate->hj_OuterNotEmpty = true;
683 
684  return slot;
685  }
686 
687  /*
688  * That tuple couldn't match because of a NULL, so discard it and
689  * continue with the next one.
690  */
691  slot = ExecProcNode(outerNode);
692  }
693  }
694  else if (curbatch < hashtable->nbatch)
695  {
696  BufFile *file = hashtable->outerBatchFile[curbatch];
697 
698  /*
699  * In outer-join cases, we could get here even though the batch file
700  * is empty.
701  */
702  if (file == NULL)
703  return NULL;
704 
705  slot = ExecHashJoinGetSavedTuple(hjstate,
706  file,
707  hashvalue,
708  hjstate->hj_OuterTupleSlot);
709  if (!TupIsNull(slot))
710  return slot;
711  }
712 
713  /* End of this batch */
714  return NULL;
715 }
716 
717 /*
718  * ExecHashJoinNewBatch
719  * switch to a new hashjoin batch
720  *
721  * Returns true if successful, false if there are no more batches.
722  */
723 static bool
725 {
726  HashJoinTable hashtable = hjstate->hj_HashTable;
727  int nbatch;
728  int curbatch;
729  BufFile *innerFile;
730  TupleTableSlot *slot;
731  uint32 hashvalue;
732 
733  nbatch = hashtable->nbatch;
734  curbatch = hashtable->curbatch;
735 
736  if (curbatch > 0)
737  {
738  /*
739  * We no longer need the previous outer batch file; close it right
740  * away to free disk space.
741  */
742  if (hashtable->outerBatchFile[curbatch])
743  BufFileClose(hashtable->outerBatchFile[curbatch]);
744  hashtable->outerBatchFile[curbatch] = NULL;
745  }
746  else /* we just finished the first batch */
747  {
748  /*
749  * Reset some of the skew optimization state variables, since we no
750  * longer need to consider skew tuples after the first batch. The
751  * memory context reset we are about to do will release the skew
752  * hashtable itself.
753  */
754  hashtable->skewEnabled = false;
755  hashtable->skewBucket = NULL;
756  hashtable->skewBucketNums = NULL;
757  hashtable->nSkewBuckets = 0;
758  hashtable->spaceUsedSkew = 0;
759  }
760 
761  /*
762  * We can always skip over any batches that are completely empty on both
763  * sides. We can sometimes skip over batches that are empty on only one
764  * side, but there are exceptions:
765  *
766  * 1. In a left/full outer join, we have to process outer batches even if
767  * the inner batch is empty. Similarly, in a right/full outer join, we
768  * have to process inner batches even if the outer batch is empty.
769  *
770  * 2. If we have increased nbatch since the initial estimate, we have to
771  * scan inner batches since they might contain tuples that need to be
772  * reassigned to later inner batches.
773  *
774  * 3. Similarly, if we have increased nbatch since starting the outer
775  * scan, we have to rescan outer batches in case they contain tuples that
776  * need to be reassigned.
777  */
778  curbatch++;
779  while (curbatch < nbatch &&
780  (hashtable->outerBatchFile[curbatch] == NULL ||
781  hashtable->innerBatchFile[curbatch] == NULL))
782  {
783  if (hashtable->outerBatchFile[curbatch] &&
784  HJ_FILL_OUTER(hjstate))
785  break; /* must process due to rule 1 */
786  if (hashtable->innerBatchFile[curbatch] &&
787  HJ_FILL_INNER(hjstate))
788  break; /* must process due to rule 1 */
789  if (hashtable->innerBatchFile[curbatch] &&
790  nbatch != hashtable->nbatch_original)
791  break; /* must process due to rule 2 */
792  if (hashtable->outerBatchFile[curbatch] &&
793  nbatch != hashtable->nbatch_outstart)
794  break; /* must process due to rule 3 */
795  /* We can ignore this batch. */
796  /* Release associated temp files right away. */
797  if (hashtable->innerBatchFile[curbatch])
798  BufFileClose(hashtable->innerBatchFile[curbatch]);
799  hashtable->innerBatchFile[curbatch] = NULL;
800  if (hashtable->outerBatchFile[curbatch])
801  BufFileClose(hashtable->outerBatchFile[curbatch]);
802  hashtable->outerBatchFile[curbatch] = NULL;
803  curbatch++;
804  }
805 
806  if (curbatch >= nbatch)
807  return false; /* no more batches */
808 
809  hashtable->curbatch = curbatch;
810 
811  /*
812  * Reload the hash table with the new inner batch (which could be empty)
813  */
814  ExecHashTableReset(hashtable);
815 
816  innerFile = hashtable->innerBatchFile[curbatch];
817 
818  if (innerFile != NULL)
819  {
820  if (BufFileSeek(innerFile, 0, 0L, SEEK_SET))
821  ereport(ERROR,
823  errmsg("could not rewind hash-join temporary file: %m")));
824 
825  while ((slot = ExecHashJoinGetSavedTuple(hjstate,
826  innerFile,
827  &hashvalue,
828  hjstate->hj_HashTupleSlot)))
829  {
830  /*
831  * NOTE: some tuples may be sent to future batches. Also, it is
832  * possible for hashtable->nbatch to be increased here!
833  */
834  ExecHashTableInsert(hashtable, slot, hashvalue);
835  }
836 
837  /*
838  * after we build the hash table, the inner batch file is no longer
839  * needed
840  */
841  BufFileClose(innerFile);
842  hashtable->innerBatchFile[curbatch] = NULL;
843  }
844 
845  /*
846  * Rewind outer batch file (if present), so that we can start reading it.
847  */
848  if (hashtable->outerBatchFile[curbatch] != NULL)
849  {
850  if (BufFileSeek(hashtable->outerBatchFile[curbatch], 0, 0L, SEEK_SET))
851  ereport(ERROR,
853  errmsg("could not rewind hash-join temporary file: %m")));
854  }
855 
856  return true;
857 }
858 
859 /*
860  * ExecHashJoinSaveTuple
861  * save a tuple to a batch file.
862  *
863  * The data recorded in the file for each tuple is its hash value,
864  * then the tuple in MinimalTuple format.
865  *
866  * Note: it is important always to call this in the regular executor
867  * context, not in a shorter-lived context; else the temp file buffers
868  * will get messed up.
869  */
870 void
872  BufFile **fileptr)
873 {
874  BufFile *file = *fileptr;
875  size_t written;
876 
877  if (file == NULL)
878  {
879  /* First write to this batch file, so open it. */
880  file = BufFileCreateTemp(false);
881  *fileptr = file;
882  }
883 
884  written = BufFileWrite(file, (void *) &hashvalue, sizeof(uint32));
885  if (written != sizeof(uint32))
886  ereport(ERROR,
888  errmsg("could not write to hash-join temporary file: %m")));
889 
890  written = BufFileWrite(file, (void *) tuple, tuple->t_len);
891  if (written != tuple->t_len)
892  ereport(ERROR,
894  errmsg("could not write to hash-join temporary file: %m")));
895 }
896 
897 /*
898  * ExecHashJoinGetSavedTuple
899  * read the next tuple from a batch file. Return NULL if no more.
900  *
901  * On success, *hashvalue is set to the tuple's hash value, and the tuple
902  * itself is stored in the given slot.
903  */
904 static TupleTableSlot *
906  BufFile *file,
907  uint32 *hashvalue,
908  TupleTableSlot *tupleSlot)
909 {
910  uint32 header[2];
911  size_t nread;
912  MinimalTuple tuple;
913 
914  /*
915  * Since both the hash value and the MinimalTuple length word are uint32,
916  * we can read them both in one BufFileRead() call without any type
917  * cheating.
918  */
919  nread = BufFileRead(file, (void *) header, sizeof(header));
920  if (nread == 0) /* end of file */
921  {
922  ExecClearTuple(tupleSlot);
923  return NULL;
924  }
925  if (nread != sizeof(header))
926  ereport(ERROR,
928  errmsg("could not read from hash-join temporary file: %m")));
929  *hashvalue = header[0];
930  tuple = (MinimalTuple) palloc(header[1]);
931  tuple->t_len = header[1];
932  nread = BufFileRead(file,
933  (void *) ((char *) tuple + sizeof(uint32)),
934  header[1] - sizeof(uint32));
935  if (nread != header[1] - sizeof(uint32))
936  ereport(ERROR,
938  errmsg("could not read from hash-join temporary file: %m")));
939  return ExecStoreMinimalTuple(tuple, tupleSlot, true);
940 }
941 
942 
943 void
945 {
946  /*
947  * In a multi-batch join, we currently have to do rescans the hard way,
948  * primarily because batch temp files may have already been released. But
949  * if it's a single-batch join, and there is no parameter change for the
950  * inner subnode, then we can just re-use the existing hash table without
951  * rebuilding it.
952  */
953  if (node->hj_HashTable != NULL)
954  {
955  if (node->hj_HashTable->nbatch == 1 &&
956  node->js.ps.righttree->chgParam == NULL)
957  {
958  /*
959  * Okay to reuse the hash table; needn't rescan inner, either.
960  *
961  * However, if it's a right/full join, we'd better reset the
962  * inner-tuple match flags contained in the table.
963  */
964  if (HJ_FILL_INNER(node))
966 
967  /*
968  * Also, we need to reset our state about the emptiness of the
969  * outer relation, so that the new scan of the outer will update
970  * it correctly if it turns out to be empty this time. (There's no
971  * harm in clearing it now because ExecHashJoin won't need the
972  * info. In the other cases, where the hash table doesn't exist
973  * or we are destroying it, we leave this state alone because
974  * ExecHashJoin will need it the first time through.)
975  */
976  node->hj_OuterNotEmpty = false;
977 
978  /* ExecHashJoin can skip the BUILD_HASHTABLE step */
980  }
981  else
982  {
983  /* must destroy and rebuild hash table */
985  node->hj_HashTable = NULL;
987 
988  /*
989  * if chgParam of subnode is not null then plan will be re-scanned
990  * by first ExecProcNode.
991  */
992  if (node->js.ps.righttree->chgParam == NULL)
993  ExecReScan(node->js.ps.righttree);
994  }
995  }
996 
997  /* Always reset intra-tuple state */
998  node->hj_CurHashValue = 0;
999  node->hj_CurBucketNo = 0;
1001  node->hj_CurTuple = NULL;
1002 
1003  node->js.ps.ps_TupFromTlist = false;
1004  node->hj_MatchedOuter = false;
1005  node->hj_FirstOuterTupleSlot = NULL;
1006 
1007  /*
1008  * if chgParam of subnode is not null then plan will be re-scanned by
1009  * first ExecProcNode.
1010  */
1011  if (node->js.ps.lefttree->chgParam == NULL)
1012  ExecReScan(node->js.ps.lefttree);
1013 }
JoinType jointype
Definition: execnodes.h:1634
#define NIL
Definition: pg_list.h:69
#define HJ_NEED_NEW_BATCH
Definition: nodeHashjoin.c:35
List * qual
Definition: plannodes.h:122
#define INVALID_SKEW_BUCKET_NO
Definition: hashjoin.h:101
TupleTableSlot * ExecProcNode(PlanState *node)
Definition: execProcnode.c:374
#define IsA(nodeptr, _type_)
Definition: nodes.h:542
#define HJ_SCAN_BUCKET
Definition: nodeHashjoin.c:32
TupleTableSlot * ExecInitExtraTupleSlot(EState *estate)
Definition: execTuples.c:855
HashJoinTable ExecHashTableCreate(Hash *node, List *hashOperators, bool keepNulls)
Definition: nodeHash.c:246
TupleTableSlot * ExecStoreMinimalTuple(MinimalTuple mtup, TupleTableSlot *slot, bool shouldFree)
Definition: execTuples.c:387
TupleTableSlot * hj_NullInnerTupleSlot
Definition: execnodes.h:1745
ProjectionInfo * ps_ProjInfo
Definition: execnodes.h:1059
int BufFileSeek(BufFile *file, int fileno, off_t offset, int whence)
Definition: buffile.c:478
PlanState ps
Definition: execnodes.h:1633
void ExecEndNode(PlanState *node)
Definition: execProcnode.c:614
bool ExecScanHashTableForUnmatched(HashJoinState *hjstate, ExprContext *econtext)
Definition: nodeHash.c:1146
MinimalTuple ExecFetchSlotMinimalTuple(TupleTableSlot *slot)
Definition: execTuples.c:655
Definition: plannodes.h:96
List * hashclauses
Definition: plannodes.h:655
List * args
Definition: execnodes.h:677
void ExecPrepHashTableForUnmatched(HashJoinState *hjstate)
Definition: nodeHash.c:1122
ExprContext * ps_ExprContext
Definition: execnodes.h:1058
void ExecHashTableReset(HashJoinTable hashtable)
Definition: nodeHash.c:1214
HashJoinTable hashtable
Definition: execnodes.h:1972
void ExecReScan(PlanState *node)
Definition: execAmi.c:73
TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: execTuples.c:442
List * qual
Definition: execnodes.h:1042
bool hj_MatchedOuter
Definition: execnodes.h:1748
List * targetlist
Definition: execnodes.h:1041
TupleTableSlot * ExecProject(ProjectionInfo *projInfo, ExprDoneCond *isDone)
Definition: execQual.c:5520
static TupleTableSlot * ExecHashJoinOuterGetTuple(PlanState *outerNode, HashJoinState *hjstate, uint32 *hashvalue)
Definition: nodeHashjoin.c:647
EState * state
Definition: execnodes.h:1029
List * joinqual
Definition: execnodes.h:1635
TupleTableSlot * hj_OuterTupleSlot
Definition: execnodes.h:1742
struct PlanState * righttree
Definition: execnodes.h:1044
List * hj_OuterHashKeys
Definition: execnodes.h:1734
List * lappend_oid(List *list, Oid datum)
Definition: list.c:164
TupleTableSlot * hj_FirstOuterTupleSlot
Definition: execnodes.h:1746
void ExecFreeExprContext(PlanState *planstate)
Definition: execUtils.c:696
#define lsecond(l)
Definition: pg_list.h:114
Join join
Definition: plannodes.h:654
void BufFileClose(BufFile *file)
Definition: buffile.c:202
int ExecHashGetSkewBucket(HashJoinTable hashtable, uint32 hashvalue)
Definition: nodeHash.c:1449
void ExecAssignResultTypeFromTL(PlanState *planstate)
Definition: execUtils.c:435
struct PlanState * lefttree
Definition: execnodes.h:1043
void ExecEndHashJoin(HashJoinState *node)
Definition: nodeHashjoin.c:604
#define HJ_FILL_INNER(hjstate)
Definition: nodeHashjoin.c:40
int * skewBucketNums
Definition: hashjoin.h:146
void ExecHashTableInsert(HashJoinTable hashtable, TupleTableSlot *slot, uint32 hashvalue)
Definition: nodeHash.c:834
void ExecHashGetBucketAndBatch(HashJoinTable hashtable, uint32 hashvalue, int *bucketno, int *batchno)
Definition: nodeHash.c:1032
JoinType jointype
Definition: plannodes.h:598
uint32 hj_CurHashValue
Definition: execnodes.h:1738
int hj_CurSkewBucketNo
Definition: execnodes.h:1740
TupleTableSlot * ps_ResultTupleSlot
Definition: execnodes.h:1057
ExprState * ExecInitExpr(Expr *node, PlanState *parent)
Definition: execQual.c:4452
TupleTableSlot * ExecInitNullTupleSlot(EState *estate, TupleDesc tupType)
Definition: execTuples.c:869
#define linitial(l)
Definition: pg_list.h:110
#define ERROR
Definition: elog.h:43
TupleTableSlot * hj_NullOuterTupleSlot
Definition: execnodes.h:1744
Expr * expr
Definition: execnodes.h:578
BufFile * BufFileCreateTemp(bool interXact)
Definition: buffile.c:166
void ExecInitResultTupleSlot(EState *estate, PlanState *planstate)
Definition: execTuples.c:835
#define EXEC_FLAG_BACKWARD
Definition: executor.h:59
BufFile ** outerBatchFile
Definition: hashjoin.h:167
#define outerPlanState(node)
Definition: execnodes.h:1072
#define innerPlan(node)
Definition: plannodes.h:150
Cost startup_cost
Definition: plannodes.h:103
void ExecAssignProjectionInfo(PlanState *planstate, TupleDesc inputDesc)
Definition: execUtils.c:668
HashJoinTuple hj_CurTuple
Definition: execnodes.h:1741
MinimalTupleData * MinimalTuple
Definition: htup.h:27
bool ExecScanHashBucket(HashJoinState *hjstate, ExprContext *econtext)
Definition: nodeHash.c:1064
int errcode_for_file_access(void)
Definition: elog.c:598
TupleTableSlot * ecxt_innertuple
Definition: execnodes.h:123
bool ExecQual(List *qual, ExprContext *econtext, bool resultForNull)
Definition: execQual.c:5246
#define TupIsNull(slot)
Definition: tuptable.h:138
unsigned int uint32
Definition: c.h:265
PlanState ps
Definition: execnodes.h:1971
#define InstrCountFiltered1(node, delta)
Definition: execnodes.h:1075
#define ereport(elevel, rest)
Definition: elog.h:122
List * hj_HashOperators
Definition: execnodes.h:1736
Bitmapset * chgParam
Definition: execnodes.h:1052
#define outerPlan(node)
Definition: plannodes.h:151
List * lappend(List *list, void *datum)
Definition: list.c:128
int hj_CurBucketNo
Definition: execnodes.h:1739
#define HJ_FILL_INNER_TUPLES
Definition: nodeHashjoin.c:34
#define HJ_FILL_OUTER(hjstate)
Definition: nodeHashjoin.c:38
ExprDoneCond
Definition: execnodes.h:159
HashSkewBucket ** skewBucket
Definition: hashjoin.h:143
HashJoinState * ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
Definition: nodeHashjoin.c:435
void ExecSetSlotDescriptor(TupleTableSlot *slot, TupleDesc tupdesc)
Definition: execTuples.c:250
Plan * plan
Definition: execnodes.h:1027
double totalTuples
Definition: hashjoin.h:156
#define HJTUPLE_MINTUPLE(hjtup)
Definition: hashjoin.h:72
#define makeNode(_type_)
Definition: nodes.h:539
TupleTableSlot * ecxt_outertuple
Definition: execnodes.h:124
bool hj_OuterNotEmpty
Definition: execnodes.h:1749
#define NULL
Definition: c.h:226
#define Assert(condition)
Definition: c.h:667
#define lfirst(lc)
Definition: pg_list.h:106
#define EXEC_FLAG_MARK
Definition: executor.h:60
#define InstrCountFiltered2(node, delta)
Definition: execnodes.h:1080
void ExecAssignExprContext(EState *estate, PlanState *planstate)
Definition: execUtils.c:413
BufFile ** innerBatchFile
Definition: hashjoin.h:166
static TupleTableSlot * ExecHashJoinGetSavedTuple(HashJoinState *hjstate, BufFile *file, uint32 *hashvalue, TupleTableSlot *tupleSlot)
Definition: nodeHashjoin.c:905
ExprState xprstate
Definition: execnodes.h:676
static void header(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:205
TupleDesc ExecGetResultType(PlanState *planstate)
Definition: execUtils.c:464
#define HJ_NEED_NEW_OUTER
Definition: nodeHashjoin.c:31
List * targetlist
Definition: plannodes.h:121
List * hashclauses
Definition: execnodes.h:1733
TupleTableSlot * ExecHashJoin(HashJoinState *node)
Definition: nodeHashjoin.c:62
TupleTableSlot * hj_HashTupleSlot
Definition: execnodes.h:1743
List * hj_InnerHashKeys
Definition: execnodes.h:1735
#define HJ_BUILD_HASHTABLE
Definition: nodeHashjoin.c:30
void * palloc(Size size)
Definition: mcxt.c:894
HashJoinTable hj_HashTable
Definition: execnodes.h:1737
int errmsg(const char *fmt,...)
Definition: elog.c:797
Node * MultiExecProcNode(PlanState *node)
Definition: execProcnode.c:561
size_t BufFileRead(BufFile *file, void *ptr, size_t size)
Definition: buffile.c:358
Cost total_cost
Definition: plannodes.h:104
#define HeapTupleHeaderSetMatch(tup)
Definition: htup_details.h:497
size_t BufFileWrite(BufFile *file, void *ptr, size_t size)
Definition: buffile.c:405
void ExecHashTableResetMatchFlags(HashJoinTable hashtable)
Definition: nodeHash.c:1243
bool ExecHashGetHashValue(HashJoinTable hashtable, ExprContext *econtext, List *hashkeys, bool outer_tuple, bool keep_nulls, uint32 *hashvalue)
Definition: nodeHash.c:928
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:97
Oid opno
Definition: primnodes.h:471
#define HJ_FILL_OUTER_TUPLE
Definition: nodeHashjoin.c:33
#define elog
Definition: elog.h:218
#define innerPlanState(node)
Definition: execnodes.h:1071
PlanState * ExecInitNode(Plan *node, EState *estate, int eflags)
Definition: execProcnode.c:136
bool ps_TupFromTlist
Definition: execnodes.h:1060
Definition: pg_list.h:45
JoinState js
Definition: execnodes.h:1732
List * joinqual
Definition: plannodes.h:599
static bool ExecHashJoinNewBatch(HashJoinState *hjstate)
Definition: nodeHashjoin.c:724
void ExecHashJoinSaveTuple(MinimalTuple tuple, uint32 hashvalue, BufFile **fileptr)
Definition: nodeHashjoin.c:871
void ExecHashTableDestroy(HashJoinTable hashtable)
Definition: nodeHash.c:567
#define ResetExprContext(econtext)
Definition: executor.h:316
Plan plan
Definition: plannodes.h:597
void ExecReScanHashJoin(HashJoinState *node)
Definition: nodeHashjoin.c:944