PS

DECISION TREE

entropy-based ID3 classifier


Algorithm.ts
1// Algorithm.ts — ID3 Decision Tree
2
3function entropy(samples: DataPoint[]): number {
4 const counts = countByClass(samples);
5 const n = samples.length;
6 return -Object.values(counts).reduce(
7 (sum, c) => sum + (c/n) * Math.log2(c/n), 0
8 );
9}
10
11function bestSplit(samples, feature) {
12 const vals = unique(samples, feature).sort();
13 return vals.slice(0,-1).map((v,i) => {
14 const t = (v + vals[i+1]) / 2;
15 const left = samples.filter(s => s[feature] <= t);
16 const right = samples.filter(s => s[feature] > t);
17 const wH = (left.length/samples.length)*entropy(left)
18 + (right.length/samples.length)*entropy(right);
19 return { threshold: t, entropy: wH };
20 }).reduce((a,b) => b.entropy < a.entropy ? b : a);
21}
22
23function bestAttribute(samples) {
24 const H = entropy(samples);
25 return features.map(f => ({
26 feature: f, ...bestSplit(samples, f),
27 gain: H - bestSplit(samples, f).entropy
28 })).reduce((a,b) => b.gain > a.gain ? b : a);
29}
30
31function buildTree(samples, depth=0): TreeNode {
32 if (entropy(samples) < 0.1 || depth >= 5)
33 return leaf(majorityClass(samples));
34 const { feature, threshold } = bestAttribute(samples);
35 return {
36 feature, threshold,
37 left: buildTree(samples.filter(s=>s[feature]<=threshold), depth+1),
38 right: buildTree(samples.filter(s=>s[feature]> threshold), depth+1),
39 };
40}
tree — DecisionTree
scatter — feature space
dataset
speed
med