Purpose: Understand how Social media uses data to find new potential client
Step 1: Read Data
# Read csv file
social <- read.csv("Social_Network_Ads.csv")
Step 2 : Prepare data and explore data
summary(social)
## User.ID Gender Age EstimatedSalary
## Min. :15566689 Female:204 Min. :18.00 Min. : 15000
## 1st Qu.:15626764 Male :196 1st Qu.:29.75 1st Qu.: 43000
## Median :15694342 Median :37.00 Median : 70000
## Mean :15691540 Mean :37.66 Mean : 69742
## 3rd Qu.:15750363 3rd Qu.:46.00 3rd Qu.: 88000
## Max. :15815236 Max. :60.00 Max. :150000
## Purchased
## Min. :0.0000
## 1st Qu.:0.0000
## Median :0.0000
## Mean :0.3575
## 3rd Qu.:1.0000
## Max. :1.0000
# User ID column is useless for classfication
social <- social[-1]
social$Purchased <- ifelse(social$Purchased == 0, "No","Yes")
social$Purchased <- factor(social$Purchased, levels = c("No","Yes"))
Step 3 : Split data into training and validation set
library(caTools)
set.seed(2017)
SplitRatio <- 0.75
split = sample.split(social$Purchased, SplitRatio = SplitRatio)
train.sample <- subset(social, split == TRUE)
valid.sample <- subset(social, split == FALSE)
Step 4 : Training a naive bayes Model on training dataset
library(e1071)
classifier <- naiveBayes(Purchased ~ Gender + Age + EstimatedSalary, data = train.sample)
Step 5 : Eveluate the performace of the model
purchased.pred <- predict(classifier, newdata = valid.sample, type = "class")
library(gmodels)
CrossTable( x = valid.sample[,4],
y = purchased.pred,
prop.chisq = FALSE,
addmargins = FALSE
)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 100
##
##
## | purchased.pred
## valid.sample[, 4] | No | Yes | Row Total |
## ------------------|-----------|-----------|-----------|
## No | 59 | 5 | 64 |
## | 0.922 | 0.078 | 0.640 |
## | 0.952 | 0.132 | |
## | 0.590 | 0.050 | |
## ------------------|-----------|-----------|-----------|
## Yes | 3 | 33 | 36 |
## | 0.083 | 0.917 | 0.360 |
## | 0.048 | 0.868 | |
## | 0.030 | 0.330 | |
## ------------------|-----------|-----------|-----------|
## Column Total | 62 | 38 | 100 |
## | 0.620 | 0.380 | |
## ------------------|-----------|-----------|-----------|
##
##
No comments:
Post a Comment