X Fold-Cross-Validation, Accuracy, Precision, Recall

In [1]:
from matplotlib.widgets import Button
import matplotlib.pyplot as plt
from math import sqrt
import numpy as np

Data

[y, x, color]

In [2]:
points = [
    [1, 1, "Red"], [1, 4, "Red"], [1, 9, "Black"], [5, 3,"Black"],
    [5, 6, "Red"], [6, 1, "Black"], [6, 2, "Black"], [6, 5, "Black"],
    [6, 8, "Red"], [8, 1, "Red"], [9, 3, "Black"], [2, 9, "Black"]
]

plt.title("Points")
plt.xlabel("x")
plt.ylabel("y")

char = 'A'
for point in points:
    plt.plot(point[1],point[0], "o", label=char, color=point[2])
    char = chr(ord(char) + 1)

plt.legend()
plt.show()

Function for euclidean distance

$d_{ij} = \sqrt{ \sum_{i=1}^{p} (X_{ik} - X_{jk})^2 }$

In [3]:
def euc_dis(point1, point2):
    return sqrt( (point1[0]-point2[0])**2 + (point1[1]-point2[1])**2 )

Split list to n parts

In [4]:
def split(seq, num):
    avg = len(seq) / float(num)
    out = []
    last = 0.0

    while last < len(seq):
        out.append(seq[int(last):int(last + avg)])
        last += avg

    return out

Function to find nearest neighbor

In [5]:
def calc(training_data, testing_data):
    result = []
    for i in range(0,len(testing_data)):
        min = 10
        result.append([i,0])
        for point in training_data:
            distance = euc_dis(testing_data[i], point)
            if distance < min:
                min = distance
                result[i][1] = point[2]
    return result

Find status

In [6]:
splited = split(points, 3)

results = []
values = []
for i in range(0,len(splited)):
    testing_data = splited[i]
    training_data = points
    training_data = [x for x in training_data if x not in testing_data]

    results.append(calc(training_data, testing_data))
    values.append(testing_data)


status = []
for i in range(0, len(results)):
    # [TP, FN, FP, TN]
    status.append([0,0,0,0])
    for j in range(0, len(results[0])):
        if results[i][j][1] == values[i][j][2]:
            if results[i][j][1] == "Red":
                status[i][0]+= 1
            else:
                status[i][3]+= 1
        else:
            if results[i][j][1] == "Red":
                status[i][2]+= 1
            else:
                status[i][1]+= 1

Accuracy

$\frac{TP+TN}{TP+TN+FP+FN}$

Precision

$\frac{TP}{TP+FP}$

Recall

$\frac{TP}{TP+FN}$

In [7]:
ac=0
pr=0
re=0

for i in range(0,len(status)):
    ac += (status[i][0]+status[i][3]) / (status[i][0]+status[i][3]+status[i][2]+status[i][1])
    pr += (status[i][0]) / (status[i][0]+status[i][2])
    re += (status[i][0]) / (status[i][0]+status[i][1])

ac/=3
pr/=3
re/=3

print(
    "Accuracy:  ", ac, "\n"
    "Precision: ", pr, "\n"
    "Recall:    ", re
)
Accuracy:   0.75 
Precision:  0.8333333333333334 
Recall:     0.6666666666666666