One of the most important, common and awarded clustering algorithm is DBSCAN. It is a density-based process which clusters points that are packed together. Opposed to that, points present in low density zones are marked as outliers.
Since DBSCAN is a very interesting and important algorithm, we decided to publish our Java implementation of it. On the following code boxes, we provide the our complete Java code to program the DBSCAN process.
This is a simple implementation which can be optimized and you can use or modify as you wish.
Algorithm.java
1 2 3 4 5 6 7 8 9 10 11 12 13 |
package com.dataonfocus.clustering; import java.util.List; import com.dataonfocus.clustering.structures.DataPoint; public interface Algorithm { public void setPoints(List points); public void cluster(); } |
DataPoint.java
1 2 3 4 5 6 7 8 9 10 11 12 13 14 |
package com.dataonfocus.clustering.structures; public interface DataPoint { public double distance(DataPoint datapoint); public void setCluster(int id); public int getCluster(); public int getX(); public int getY(); } |
Cluster.java
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
package com.dataonfocus.clustering.structures; import java.util.ArrayList; import java.util.List; public class Cluster { public List points; public DataPoint centroid; public int id; public Cluster(int id) { this.id = id; this.points = new ArrayList(); this.centroid = null; } public List getPoints() { return points; } public void addPoint(DataPoint point) { points.add(point); point.setCluster(id); } public void setPoints(List points) { this.points = points; } public DataPoint getCentroid() { return centroid; } public void setCentroid(Point centroid) { this.centroid = centroid; } public int getId() { return id; } public void clear() { points.clear(); } public void plotCluster() { System.out.println("[Cluster: " + id+"]"); System.out.println("[Centroid: " + centroid + "]"); System.out.println("[Points: \n"); for(DataPoint p : points) { System.out.println(p); } System.out.println("]"); } } |
DBSCAN.java
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
package com.dataonfocus.clustering.density; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import com.dataonfocus.clustering.Algorithm; import com.dataonfocus.clustering.structures.Cluster; import com.dataonfocus.clustering.structures.DataPoint; public class DBSCAN implements Algorithm{ public List points; private List clusters; public double max_distance; public int min_points; public boolean[] visited; public DBSCAN(double max_distance, int min_points) { this.points = new ArrayList(); this.clusters = new ArrayList(); this.max_distance = max_distance; this.min_points = min_points; } public void cluster() { Iterator it = points.iterator(); int n = 0; while(it.hasNext()) { if(!visited[n]) { DataPoint d = it.next(); visited[n] = true; List neighbors = getNeighbors(d); if(neighbors.size() >= min_points) { Cluster c = new Cluster(clusters.size()); buildCluster(d,c,neighbors); clusters.add(c); } } } } private void buildCluster(DataPoint d, Cluster c, List neighbors) { c.addPoint(d); for (Integer point : neighbors) { DataPoint p = points.get(point); if(!visited[point]) { visited[point] = true; List newNeighbors = getNeighbors(p); if(newNeighbors.size() >= min_points) { neighbors.addAll(newNeighbors); } } if(p.getCluster() == -1) { c.addPoint(p); } } } private List getNeighbors(DataPoint d) { List neighbors = new ArrayList(); int i = 0; for (DataPoint point : points) { double distance = d.distance(point); if(distance <= max_distance) { neighbors.add(i); } i++; } return neighbors; } public void setPoints(List points) { this.points = points; this.visited = new boolean[points.size()]; } } |
Feel free to use and adapt our solution to this problem. We will love to know what you are working on and how we helped you with this piece of code. So, if you want to make some suggestion or comment, just contact us!