| 
    Algorithms_in_C
    1.0.0
    
   Set of algorithms implemented in C. 
   | 
 
 
 
 
◆ calculateCentroid()
Calculate centoid and assign it to the cluster variable
- Parameters
 - 
  
    | observations | an array of observations whose centroid is calculated  | 
    | size | size of the observations array  | 
    | centroid | a reference to cluster object to store information of centroid  | 
  
   
  103     centroid->
count = size;
 
  104     for (; i < size; i++)
 
  106         centroid->
x += observations[i].
x;
 
  107         centroid->
y += observations[i].
y;
 
  108         observations[i].
group = 0;
 
  110     centroid->
x /= centroid->
count;
 
  111     centroid->
y /= centroid->
count;
 
 
 
 
◆ calculateNearst()
Returns the index of centroid nearest to given observation
- Parameters
 - 
  
    | o | observation  | 
    | clusters | array of cluster having centroids coordinates  | 
    | k | size of clusters array | 
  
   
- Returns
 - the index of nearest centroid for given observation 
 
   71     double minD = DBL_MAX;
 
   78         dist = (clusters[i].
x - o->
x) * (clusters[i].x - o->
x) +
 
   79                (clusters[i].
y - o->
y) * (clusters[i].y - o->
y);
 
 
 
 
◆ kMeans()
–K Means Algorithm–
- Assign each observation to one of k groups creating a random initial clustering
 
- Find the centroid of observations for each cluster to form new centroids
 
- Find the centroid which is nearest for each observation among the calculated centroids
 
- Assign the observation to its nearest centroid to create a new clustering.
 
- Repeat step 2,3,4 until there is no change the current clustering and is same as last clustering.
 
- Parameters
 - 
  
    | observations | an array of observations to cluster  | 
    | size | size of observations array  | 
    | k | no of clusters to be made | 
  
   
- Returns
 - pointer to cluster object 
 
  145         memset(clusters, 0, 
sizeof(
cluster));
 
  150         clusters = malloc(
sizeof(
cluster) * k);
 
  151         memset(clusters, 0, k * 
sizeof(
cluster));
 
  153         for (
size_t j = 0; j < size; j++)
 
  155             observations[j].
group = rand() % k;
 
  158         size_t minAcceptedError =
 
  165             for (
int i = 0; i < k; i++)
 
  169                 clusters[i].
count = 0;
 
  172             for (
size_t j = 0; j < size; j++)
 
  174                 t = observations[j].
group;
 
  175                 clusters[t].
x += observations[j].
x;
 
  176                 clusters[t].
y += observations[j].
y;
 
  179             for (
int i = 0; i < k; i++)
 
  181                 clusters[i].
x /= clusters[i].
count;
 
  182                 clusters[i].
y /= clusters[i].
count;
 
  186             for (
size_t j = 0; j < size; j++)
 
  189                 if (t != observations[j].group)
 
  192                     observations[j].
group = t;
 
  195         } 
while (changed > minAcceptedError);  
 
  204         memset(clusters, 0, k * 
sizeof(
cluster));
 
  205         for (
int j = 0; j < size; j++)
 
  207             clusters[j].
x = observations[j].
x;
 
  208             clusters[j].
y = observations[j].
y;
 
  209             clusters[j].
count = 1;
 
  210             observations[j].
group = j;
 
 
 
 
 
 
int group
the group no in which this observation would go
Definition: k_means_clustering.c:42
 
int calculateNearst(observation *o, cluster clusters[], int k)
Definition: k_means_clustering.c:69
 
void calculateCentroid(observation observations[], size_t size, cluster *centroid)
Definition: k_means_clustering.c:97
 
double x
abscissa of 2D data point
Definition: k_means_clustering.c:40
 
Definition: k_means_clustering.c:53
 
double y
ordinate of centroid of this cluster
Definition: k_means_clustering.c:55
 
size_t count
count of observations present in this cluster
Definition: k_means_clustering.c:56
 
double x
abscissa centroid of this cluster
Definition: k_means_clustering.c:54
 
double y
ordinate of 2D data point
Definition: k_means_clustering.c:41