Naive Bayes Classification

In [1]:
from collections import Counter
from math import sqrt

Data

Marital status Buyer
Divorced Yes
Married No
Unmarried Yes
Unmarried Yes
Married No
Divorced Yes
Divorced Yes
Divorced Yes
Divorced Yes
Unmarried No
In [2]:
data = [
    ["Divorced", "Yes"],
    ["Divorced", "Yes"],
    ["Married", "No"],
    ["Unmarried", "Yes"],
    ["Unmarried", "Yes"],
    ["Married", "No"],
    ["Divorced", "Yes"],
    ["Divorced", "Yes"],
    ["Divorced", "Yes"],
    ["Unmarried", "No"]
]

Separate data by class

In [102]:
def separate(data):
    separated = dict()
    for i in range(len(data)):
        vector = data[i]
        data_class = vector[0]
        if data_class not in separated:
            separated[data_class] = list()
        separated[data_class].append(vector[-1])
    return separated

sep_data =  separate(data)
print(sep_data)
{'Divorced': ['Yes', 'Yes', 'Yes', 'Yes', 'Yes'], 'Married': ['No', 'No'], 'Unmarried': ['Yes', 'Yes', 'No']}

Count data cardinality by class

In [101]:
def cardinality(data):
    cardinality = dict()
    for key in data.keys():
        if key not in cardinality:
            cardinality[key] = list()
        cardinality[key].append(len(data[key]))
        cardinality[key].append(data[key].count("Yes"))
        cardinality[key].append(data[key].count("No"))
    return cardinality

car_data = cardinality(sep_data)
print(car_data)
{'Divorced': [5, 5, 0], 'Married': [2, 0, 2], 'Unmarried': [3, 2, 1]}

class_attribute0_cardinality == |Yes| + |No|

class_attribute1_cardinality == |Yes|

class_attribute2_cardinality == |No|

Count data cardinality

In [99]:
def counter(data):
    counter = [0, 0]
    for key in data.keys():
        counter[0] += data[key][1]
        counter[1] += data[key][2]
    return counter

count = counter(car_data)
print(count)
[7, 3]

int0 = |Yes|

int1 = |No|

Naive Bayes Theorem

$P(class | data) = \frac{P(data | class) P(class)}{P(data)}$

In [112]:
def calc(data, class_value):
    car_data = cardinality(separate(data))
    car_ans = counter(car_data)
    p0 = p[class_value][1]/ans[0]*ans[0]/(ans[0]+ans[1])
    p1 = p[class_value][2]/ans[1]*ans[1]/(ans[0]+ans[1])
    if p0 > p1:
        return ["Yes", p0, p1]
    else:
        return ["No", p0, p1]	

Predict answer for Unmarried

In [114]:
calc(data, "Unmarried")
Out[114]:
['Yes', 0.2, 0.1]

['Answer', probability_for_yes, probability_for_no]