PHP小顶堆实现TopK

从arr[1, n]数组中,找出最大的k个数;

思路

时间复杂度:O(n*lg(k))

  • 先根据k个元素生成小顶堆,这个小顶堆用来存储当前topk个元素;
  • 再从k+1个元素开始扫描,和堆顶最小值比较,如果新增元素大于堆顶,则替换堆顶元素,并重新调整堆顶,以保证堆内k个元素是最大的元素;
  • 直到扫描到最后一个元素,最终堆里的元素即是topk了;
如图所示:

PHP小顶堆实现TopK

php代码实现

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
class Topk
{
public $top;
public $topkArr = array();
public $topkList = array();

public function __construct($topk) {
$this->top = $topk;
}

public function swap(&$arr, $i, $j)
{
$temp = $arr[$i];
$arr[$i] = $arr[$j];
$arr[$j] = $temp;
}
//n:节点
//i: 从哪个节点heapify
private function heapify(&$tree, $n, $i)
{
if ($i >= $n) {
return;
}

$c1 = (2 * $i) + 1;//左节点
$c2 = (2 * $i) + 2;//右节点
$max = $i;

//左右节点内容跟父节点比较,确保父节点是最大值
if ($c1 < $n && $tree[$c1] > $tree[$max]) {
$max = $c1;
}
if ($c2 < $n && $tree[$c2] > $tree[$max]) {
$max = $c2;
}
//当i是最大值时,不用交换
if ($max != $i) {
$this->swap($tree, $max, $i);
//交换之后对下一层继续heapify
$this->heapify($tree, $n, $max);
}
}

//从下往上构建堆:节点3->节点2->节点1
public function buildHeap(&$tree, $n)
{
$lastNode = $n - 1;
$parent = ($lastNode - 1) / 2;
for ($i = $parent; $i >= 0; $i--) {
$this->heapify($tree, $n, $i);
}
}

public function heapSort(&$tree, $n)
{
$this->buildHeap($tree, $n);
for($i = $n-1; $i >= 0; $i--) {
$this->swap($tree, $i, 0);
//剩下的i个元素重新构建成堆
$this->heapify($tree, $i, 0);
}
}

//调整
public function adjust($value)
{
if (in_array($value, $this->topkArr)) {
return;
}

//记录原始数据
$this->init($value);
$len = count($this->topkList);

if ($len < $this->top) {
array_push($this->topkList, $value);
$this->heapSort($this->topkList, $len);
} else {
//堆顶值与新值比较
if ($this->topkList[0] < $value) {
if (count($this->topkList) >= $this->top) {
$this->topkList[0] = $value;
} else {
array_unshift($this->topkList, $value);
}
$this->heapSort($this->topkList, $len);
}
}
}

public function getTopK()
{
return $this->topkList;
}

public function init($value)
{
array_push($this->topkArr, $value);
}

public function getInitData()
{
return $this->topkArr;
}

public function calc()
{
for ($i = 0; $i < 10; $i++) {
$this->adjust(mt_rand(1, 100));
}
}
}
$heapTree = new Topk(5);
$heapTree->calc();
var_dump($heapTree->getInitData(), $heapTree->getTopK());

结果:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
原始数据:
array(10) {
[0] =>
int(12)
[1] =>
int(67)
[2] =>
int(58)
[3] =>
int(92)
[4] =>
int(32)
[5] =>
int(48)
[6] =>
int(90)
[7] =>
int(76)
[8] =>
int(35)
[9] =>
int(13)
}
堆化后数据:
array(5) {
[0] =>
int(58)
[1] =>
int(67)
[2] =>
int(76)
[3] =>
int(90)
[4] =>
int(92)
}
-------------本文结束感谢您的阅读-------------
坚持原创技术分享,您的支持将鼓励我继续创作!