Skip to content

Commit e95618e

Browse files
committed
Add new topo sorting query
1 parent d197de6 commit e95618e

File tree

1 file changed

+207
-79
lines changed

1 file changed

+207
-79
lines changed

Diff for: packages/playground/data-liberation/src/import/WP_Topological_Sorter.php

+207-79
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,24 @@
99
*/
1010
class WP_Topological_Sorter {
1111

12-
public $posts = array();
13-
public $categories = array();
12+
/**
13+
* The base name of the table.
14+
*/
15+
const TABLE_NAME = 'data_liberation_index';
16+
17+
/**
18+
* The option name for the database version.
19+
*/
20+
const OPTION_NAME = 'data_liberation_db_version';
21+
22+
/**
23+
* The current database version, to be used with dbDelta.
24+
*/
25+
const DB_VERSION = 1;
26+
27+
// Element types.
28+
const ELEMENT_TYPE_POST = 1;
29+
const ELEMENT_TYPE_CATEGORY = 2;
1430

1531
/**
1632
* Variable for keeping counts of orphaned posts/attachments, it'll also be assigned as temporarly post ID.
@@ -34,27 +50,135 @@ class WP_Topological_Sorter {
3450
*/
3551
protected $sorted = false;
3652

53+
public static function get_table_name() {
54+
global $wpdb;
55+
56+
// Default is wp_{TABLE_NAME}
57+
return $wpdb->prefix . self::TABLE_NAME;
58+
}
59+
60+
/**
61+
* Run by register_activation_hook.
62+
*/
63+
public static function activate() {
64+
global $wpdb;
65+
66+
// See wp_get_db_schema
67+
$max_index_length = 191;
68+
$table_name = self::get_table_name();
69+
70+
// Create the table if it doesn't exist.
71+
// @TODO: remove this custom SQLite declaration after first phase of unit tests is done.
72+
if ( self::is_sqlite() ) {
73+
$sql = $wpdb->prepare(
74+
'CREATE TABLE IF NOT EXISTS %i (
75+
id INTEGER PRIMARY KEY AUTOINCREMENT,
76+
element_type INTEGER NOT NULL default %d,
77+
element_id INTEGER NOT NULL,
78+
parent_id INTEGER,
79+
parent TEXT NOT NULL default "",
80+
byte_offset INTEGER NOT NULL,
81+
hierarchy_level INTEGER DEFAULT NULL
82+
);
83+
84+
CREATE UNIQUE INDEX IF NOT EXISTS idx_element_id ON %i (element_id);
85+
CREATE INDEX IF NOT EXISTS idx_element_parent ON %i (parent);
86+
CREATE INDEX IF NOT EXISTS idx_byte_offset ON %i (byte_offset);',
87+
$table_name,
88+
self::ELEMENT_TYPE_POST,
89+
$table_name,
90+
$table_name,
91+
$table_name
92+
);
93+
} else {
94+
// MySQL, MariaDB.
95+
$sql = $wpdb->prepare(
96+
'CREATE TABLE IF NOT EXISTS %i (
97+
id bigint(20) unsigned NOT NULL AUTO_INCREMENT,
98+
element_type tinyint(1) NOT NULL default %d,
99+
element_id unsigned bigint(20) NOT NULL,
100+
parent_id unsigned bigint(20) DEFAULT NULL,
101+
parent varchar(200) NOT NULL default "",
102+
byte_offset bigint(20) unsigned NOT NULL,
103+
hierarchy_level INT DEFAULT NULL,
104+
PRIMARY KEY (id),
105+
UNIQUE KEY element_id (element_id(%d))
106+
KEY element_parent (element_parent(%d))
107+
KEY byte_offset (byte_offset(%d))
108+
) ' . $wpdb->get_charset_collate(),
109+
self::get_table_name(),
110+
self::ELEMENT_TYPE_POST,
111+
$max_index_length,
112+
$max_index_length,
113+
$max_index_length
114+
);
115+
}
116+
117+
require_once ABSPATH . 'wp-admin/includes/upgrade.php';
118+
dbDelta( $sql );
119+
120+
update_option( self::OPTION_NAME, self::DB_VERSION );
121+
}
122+
123+
public static function is_sqlite() {
124+
return defined( 'DB_ENGINE' ) || 'sqlite' === DB_ENGINE;
125+
}
126+
127+
/**
128+
* Run in the 'plugins_loaded' action.
129+
*/
130+
public static function load() {
131+
if ( self::DB_VERSION !== (int) get_site_option( self::OPTION_NAME ) ) {
132+
// Used to update the database with dbDelta, if needed in the future.
133+
self::activate();
134+
}
135+
}
136+
137+
/**
138+
* Run by register_deactivation_hook.
139+
*/
140+
public static function deactivate() {
141+
global $wpdb;
142+
$table_name = self::get_table_name();
143+
144+
// Drop the table.
145+
$wpdb->query( $wpdb->prepare( 'DROP TABLE IF EXISTS %s', $table_name ) );
146+
147+
// Delete the option.
148+
delete_option( self::OPTION_NAME );
149+
}
150+
151+
/**
152+
* Run by register_uninstall_hook.
153+
*/
37154
public function reset() {
38-
$this->posts = array();
39-
$this->categories = array();
40-
$this->category_index = array();
41155
$this->orphan_post_counter = 0;
42156
$this->last_post_id = 0;
43157
$this->sorted = false;
44158
}
45159

46160
public function map_category( $byte_offset, $data ) {
161+
global $wpdb;
162+
47163
if ( empty( $data ) ) {
48164
return false;
49165
}
50166

51-
$this->categories[ $data['slug'] ] = array(
52-
array_key_exists( 'parent', $data ) ? $data['parent'] : '',
53-
$byte_offset,
167+
$wpdb->insert(
168+
self::get_table_name(),
169+
array(
170+
'element_type' => self::ELEMENT_TYPE_CATEGORY,
171+
'element_id' => $data['term_id'],
172+
'parent_id' => $data['parent_id'],
173+
'parent' => array_key_exists( 'parent', $data ) ? $data['parent'] : '',
174+
'byte_offset' => $byte_offset,
175+
)
54176
);
55177
}
56178

57179
public function map_post( $byte_offset, $data ) {
180+
global $wpdb;
181+
58182
if ( empty( $data ) ) {
59183
return false;
60184
}
@@ -70,11 +194,15 @@ public function map_post( $byte_offset, $data ) {
70194
--$this->orphan_post_counter;
71195
}
72196

73-
// This is an array saved as: [ parent, byte_offset ], to save
74-
// space and not using an associative one.
75-
$this->posts[ $data['post_id'] ] = array(
76-
$data['post_parent'],
77-
$byte_offset,
197+
$wpdb->insert(
198+
self::get_table_name(),
199+
array(
200+
'element_type' => self::ELEMENT_TYPE_POST,
201+
'element_id' => $data['post_id'],
202+
'parent_id' => $data['post_parent'],
203+
'parent' => '',
204+
'byte_offset' => $byte_offset,
205+
)
78206
);
79207
}
80208

@@ -89,25 +217,20 @@ public function map_post( $byte_offset, $data ) {
89217
* @return int|bool The byte offset of the post, or false if the post is not found.
90218
*/
91219
public function get_post_byte_offset( $id ) {
220+
global $wpdb;
221+
92222
if ( ! $this->sorted ) {
93223
return false;
94224
}
95225

96-
if ( isset( $this->posts[ $id ] ) ) {
97-
$ret = $this->posts[ $id ];
98-
99-
// Remove the element from the array.
100-
unset( $this->posts[ $id ] );
101-
102-
if ( 0 === count( $this->categories ) && 0 === count( $this->posts ) ) {
103-
// All posts have been processed.
104-
$this->reset();
105-
}
106-
107-
return $ret;
108-
}
109-
110-
return false;
226+
return $wpdb->get_var(
227+
$wpdb->prepare(
228+
'SELECT byte_offset FROM %s WHERE element_id = %d AND element_type = %d',
229+
self::get_table_name(),
230+
$id,
231+
self::ELEMENT_TYPE_POST
232+
)
233+
);
111234
}
112235

113236
/**
@@ -118,25 +241,20 @@ public function get_post_byte_offset( $id ) {
118241
* @return int|bool The byte offset of the category, or false if the category is not found.
119242
*/
120243
public function get_category_byte_offset( $slug ) {
244+
global $wpdb;
245+
121246
if ( ! $this->sorted ) {
122247
return false;
123248
}
124249

125-
if ( isset( $this->categories[ $slug ] ) ) {
126-
$ret = $this->categories[ $slug ];
127-
128-
// Remove the element from the array.
129-
unset( $this->categories[ $slug ] );
130-
131-
if ( 0 === count( $this->categories ) && 0 === count( $this->posts ) ) {
132-
// All categories have been processed.
133-
$this->reset();
134-
}
135-
136-
return $ret;
137-
}
138-
139-
return false;
250+
return $wpdb->get_var(
251+
$wpdb->prepare(
252+
'SELECT byte_offset FROM %s WHERE element_id = %d AND element_type = %d',
253+
self::get_table_name(),
254+
$id,
255+
self::ELEMENT_TYPE_CATEGORY
256+
)
257+
);
140258
}
141259

142260
public function is_sorted() {
@@ -150,30 +268,30 @@ public function is_sorted() {
150268
* This method sorts the elements in the order they should be processed.
151269
*/
152270
public function sort_topologically( $free_space = true ) {
153-
foreach ( $this->categories as $slug => $category ) {
154-
$this->topological_category_sort( $slug, $category );
155-
}
271+
/*foreach ( $this->categories as $slug => $category ) {
272+
// $this->topological_category_sort( $slug, $category );
273+
}*/
156274

157-
$this->sort_elements( $this->posts );
158-
$this->sort_elements( $this->categories );
275+
$this->sort_elements( self::ELEMENT_TYPE_POST );
276+
$this->sort_elements( self::ELEMENT_TYPE_CATEGORY );
159277

160278
// Free some space.
161279
if ( $free_space ) {
162-
/**
280+
/*
163281
* @TODO: all the elements that have not been moved can be flushed away.
164-
*/
282+
*
165283
foreach ( $this->posts as $id => $element ) {
166284
// Save only the byte offset.
167285
$this->posts[ $id ] = $element[1];
168286
}
169287
170-
/**
288+
/*
171289
* @TODO: all the elements that have not been moved can be flushed away.
172-
*/
290+
*
173291
foreach ( $this->categories as $slug => $element ) {
174292
// Save only the byte offset.
175293
$this->categories[ $slug ] = $element[1];
176-
}
294+
}*/
177295
}
178296

179297
$this->sorted = true;
@@ -182,34 +300,44 @@ public function sort_topologically( $free_space = true ) {
182300
/**
183301
* Recursive sort elements. Posts with parents will be moved to the correct position.
184302
*
303+
* @param int $type The type of element to sort.
185304
* @return true
186305
*/
187-
private function sort_elements( &$elements ) {
188-
$sort_callback = function ( $a, $b ) use ( &$elements ) {
189-
$parent_a = $elements[ $a ][0];
190-
$parent_b = $elements[ $b ][0];
191-
192-
if ( ! $parent_a && ! $parent_b ) {
193-
// No parents.
194-
return 0;
195-
} elseif ( $a === $parent_b ) {
196-
// A is the parent of B.
197-
return -1;
198-
} elseif ( $b === $parent_a ) {
199-
// B is the parent of A.
200-
return 1;
201-
}
202-
203-
return 0;
204-
};
205-
206-
/**
207-
* @TODO: PHP uses quicksort: https://github.com/php/php-src/blob/master/Zend/zend_sort.c
208-
* WordPress export posts by ID and so are likely to be already in order.
209-
* Quicksort performs badly on already sorted arrays, O(n^2) is the worst case.
210-
* Let's consider using a different sorting algorithm.
211-
*/
212-
uksort( $elements, $sort_callback );
306+
private function sort_elements( $type ) {
307+
global $wpdb;
308+
$table_name = self::get_table_name();
309+
310+
return $wpdb->query(
311+
$wpdb->prepare(
312+
// Perform a topological sort CTE.
313+
'WITH RECURSIVE hierarchy_cte AS (
314+
-- Select all root nodes (where parent_id is NULL)
315+
SELECT id, parent_id, 1 AS hierarchy_level
316+
FROM %i
317+
WHERE parent_id IS NULL AND element_type = %d
318+
319+
UNION ALL
320+
321+
-- Recursive member: Join the CTE with the table to find children
322+
SELECT yt.id, yt.parent_id, hc.hierarchy_level + 1
323+
FROM %i yt
324+
WHERE element_type = %d
325+
INNER JOIN hierarchy_cte hc ON yt.parent_id = hc.id
326+
)
327+
328+
-- Update the hierarchy_level based on the computed hierarchy_level
329+
UPDATE %i
330+
SET hierarchy_level = hc.hierarchy_level
331+
FROM hierarchy_cte hc
332+
WHERE %i.id = hc.id;',
333+
$table_name,
334+
$type,
335+
$table_name,
336+
$type,
337+
$table_name,
338+
$table_name
339+
)
340+
);
213341
}
214342

215343
/**

0 commit comments

Comments
 (0)