page

Each page has a certain inner layout that usually consists of the following parts:
page header

  • page header
  • an array of item pointers
  • free space
  • items (row versions)
  • special space

Page结构示意图:

page.png

pageinspect extension

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
test=# CREATE EXTENSION pageinspect;
CREATE EXTENSION

test=# select * from accounts;
id | client | amount
----+---------+--------
2 | bob | 100.00
3 | bob | 900.00
1 | alice | 800.00
4 | charlie | 100.00
(4 rows)

test=# SELECT lower, upper, special, pagesize FROM page_header(get_raw_page('accounts',0));
lower | upper | special | pagesize
-------+-------+---------+----------
40 | 8032 | 8192 | 8192
(1 row)

test=# select * FROM heap_page_items(get_raw_page('accounts',0));
lp | lp_off | lp_flags | lp_len | t_xmin | t_xmax | t_field3 | t_ctid | t_infomask2 | t_infomask | t_hoff | t_bits | t_oid | t_data
----+--------+----------+--------+--------+--------+----------+--------+-------------+------------+--------+--------+-------+--------------------------------------
1 | 8152 | 1 | 39 | 757 | 758 | 0 | (0,4) | 16387 | 1282 | 24 | | | \x010000000d616c6963650b0081e803
2 | 8112 | 1 | 37 | 757 | 802 | 0 | (0,5) | 16387 | 2306 | 24 | | | \x0200000009626f620b00816400
3 | 8072 | 1 | 37 | 757 | 802 | 0 | (0,6) | 16387 | 2306 | 24 | | | \x0300000009626f620b00818403
4 | 8032 | 1 | 39 | 758 | 802 | 0 | (0,7) | 49155 | 10498 | 24 | | | \x010000000d616c6963650b00812003
5 | 7992 | 1 | 37 | 802 | 0 | 0 | (0,5) | 32771 | 10754 | 24 | | | \x0200000009626f620b00816300
6 | 7952 | 1 | 37 | 802 | 0 | 0 | (0,6) | 32771 | 10754 | 24 | | | \x0300000009626f620b00818303
7 | 7912 | 1 | 39 | 802 | 0 | 0 | (0,7) | 32771 | 10754 | 24 | | | \x010000000d616c6963650b00811f03
8 | 7864 | 1 | 41 | 809 | 0 | 0 | (0,8) | 3 | 2562 | 24 | | | \x0300000011636861726c69650b00816400
9 | 7816 | 1 | 41 | 811 | 0 | 0 | (0,9) | 3 | 2050 | 24 | | | \x0400000011636861726c69650b00816400
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
/*
* disk page organization
*
* space management information generic to any page
*
* pd_lsn - identifies xlog record for last change to this page.
* pd_checksum - page checksum, if set.
* pd_flags - flag bits.
* pd_lower - offset to start of free space.
* pd_upper - offset to end of free space.
* pd_special - offset to start of special space.
* pd_pagesize_version - size in bytes and page layout version number.
* pd_prune_xid - oldest XID among potentially prunable tuples on page.
*
* The LSN is used by the buffer manager to enforce the basic rule of WAL:
* "thou shalt write xlog before data". A dirty buffer cannot be dumped
* to disk until xlog has been flushed at least as far as the page's LSN.
*
* pd_checksum stores the page checksum, if it has been set for this page;
* zero is a valid value for a checksum. If a checksum is not in use then
* we leave the field unset. This will typically mean the field is zero
* though non-zero values may also be present if databases have been
* pg_upgraded from releases prior to 9.3, when the same byte offset was
* used to store the current timelineid when the page was last updated.
* Note that there is no indication on a page as to whether the checksum
* is valid or not, a deliberate design choice which avoids the problem
* of relying on the page contents to decide whether to verify it. Hence
* there are no flag bits relating to checksums.
*
* pd_prune_xid is a hint field that helps determine whether pruning will be
* useful. It is currently unused in index pages.
*
* The page version number and page size are packed together into a single
* uint16 field. This is for historical reasons: before PostgreSQL 7.3,
* there was no concept of a page version number, and doing it this way
* lets us pretend that pre-7.3 databases have page version number zero.
* We constrain page sizes to be multiples of 256, leaving the low eight
* bits available for a version number.
*
* Minimum possible page size is perhaps 64B to fit page header, opaque space
* and a minimal tuple; of course, in reality you want it much bigger, so
* the constraint on pagesize mod 256 is not an important restriction.
* On the high end, we can only support pages up to 32KB because lp_off/lp_len
* are 15 bits.
*/

typedef struct PageHeaderData
{
/* XXX LSN is member of *any* block, not only page-organized ones */
PageXLogRecPtr pd_lsn; /* LSN: next byte after last byte of xlog
* record for last change to this page */
uint16 pd_checksum; /* checksum */
uint16 pd_flags; /* flag bits, see below */
LocationIndex pd_lower; /* offset to start of free space */
LocationIndex pd_upper; /* offset to end of free space */
LocationIndex pd_special; /* offset to start of special space */
uint16 pd_pagesize_version;
TransactionId pd_prune_xid; /* oldest prunable XID, or zero if none */
ItemIdData pd_linp[FLEXIBLE_ARRAY_MEMBER]; /* line pointer array */
} PageHeaderData;

typedef PageHeaderData *PageHeader;

Record

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
test=# drop table t;
DROP TABLE
test=# CREATE TABLE t(
test(# id integer GENERATED ALWAYS AS IDENTITY, s text
test(# );
CREATE TABLE
test=# CREATE INDEX ON t(s);
CREATE INDEX
test=# begin;
BEGIN
test=*# INSERT INTO t(s) VALUES ('FOO');
INSERT 0 1
test=*# SELECT pg_current_xact_id();
pg_current_xact_id
--------------------
766
(1 row)

test=*# select * from heap_page_items(get_raw_page('t',0))\gx
-[ RECORD 1 ]-------------------
lp | 1
lp_off | 8160
lp_flags | 1
lp_len | 32
t_xmin | 766
t_xmax | 0
t_field3 | 0
t_ctid | (0,1)
t_infomask2 | 2
t_infomask | 2050
t_hoff | 24
t_bits |
t_oid |
t_data | \x0100000009464f4f

test=# SELECT '(0,'||lp||')' AS ctid,
test-# CASE lp_flags
test-# WHEN 0 THEN 'unused'
test-# WHEN 1 THEN 'normal'
test-# WHEN 2 THEN 'redirect to '||lp_off
test-# WHEN 3 THEN 'dead'
test-# END AS state,
test-# t_xmin as xmin,
test-# t_xmax as xmax,
test-# (t_infomask & 256) > 0 AS xmin_committed,
test-# (t_infomask & 512) > 0 AS xmin_aborted,
test-# (t_infomask & 1024) > 0 AS xmax_committed,
test-# (t_infomask & 2048) > 0 AS xmax_aborted
test-# FROM heap_page_items(get_raw_page('t',0));
ctid | state | xmin | xmax | xmin_committed | xmin_aborted | xmax_committed | xmax_aborted
-------+--------+------+------+----------------+--------------+----------------+--------------
(0,1) | normal | 766 | 0 | f | f | f | t
(1 row)

Reference:

【转】AntDB/PostgreSQL内部原理:表Page结构解析_postgresql对应antdb的版本-CSDN博客