Skip to content

Commit 50af454

Browse files
committed
Clustering examples
1 parent 7d5c98e commit 50af454

File tree

6 files changed

+8632
-4
lines changed

6 files changed

+8632
-4
lines changed

Diff for: .gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,4 @@ humbuglog.*
99
/data/t10k-labels-idx1-ubyte
1010
/data/stackoverflow_*.csv
1111
/data/crimes*.csv
12+
/data/bbc

Diff for: clustering/robberySquaredErrors.php

+41
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace PhpmlExamples;
6+
7+
use Phpml\Clustering\KMeans;
8+
use Phpml\Math\Distance\Euclidean;
9+
10+
require 'vendor/autoload.php';
11+
12+
$lines = file(__DIR__.'/../data/crimes-robbery.csv');
13+
foreach ($lines as &$line) {
14+
$row = explode(';', $line);
15+
$line = [(float) $row[0], (float) $row[1]];
16+
}
17+
18+
function squaredDistances(array $center, array $points): float
19+
{
20+
$sum = 0;
21+
$metric = new Euclidean();
22+
foreach ($points as $point) {
23+
$sum += $metric->sqDistance($center, $point);
24+
}
25+
26+
return $sum;
27+
}
28+
29+
30+
for ($i=1; $i<21; $i++) {
31+
$clusterer = new KMeans($i);
32+
$clusters = $clusterer->cluster($lines);
33+
$centronoids = $clusterer->centronoids();
34+
35+
$sum = 0;
36+
foreach ($centronoids as $key => $centronoid) {
37+
$sum += squaredDistances($centronoid, $clusters[$key]);
38+
}
39+
40+
echo sprintf('SSE (k=%s): %s' . PHP_EOL, $i, $sum);
41+
}

Diff for: clustering/synteticSquaredErrors.php

+41
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace PhpmlExamples;
6+
7+
use Phpml\Clustering\KMeans;
8+
use Phpml\Math\Distance\Euclidean;
9+
10+
require 'vendor/autoload.php';
11+
12+
$lines = file(__DIR__.'/../data/syntetic-g.csv');
13+
foreach ($lines as &$line) {
14+
$row = explode(',', $line);
15+
$line = [(float) $row[0], (float) $row[1]];
16+
}
17+
18+
function squaredDistances($center, $points): float
19+
{
20+
$sum = 0;
21+
$metric = new Euclidean();
22+
foreach ($points as $point) {
23+
$sum += $metric->sqDistance($center, $point);
24+
}
25+
26+
return $sum;
27+
}
28+
29+
30+
for ($i=1; $i<21; $i++) {
31+
$clusterer = new KMeans($i);
32+
$clusters = $clusterer->cluster($lines);
33+
$centronoids = $clusterer->centronoids();
34+
35+
$sum = 0;
36+
foreach ($centronoids as $key => $centronoid) {
37+
$sum += squaredDistances($centronoid, $clusters[$key]);
38+
}
39+
40+
echo sprintf('SSE (k=%s): %f' . PHP_EOL, $i, $sum);
41+
}

0 commit comments

Comments
 (0)