I am trying to make a simple decision tree , but I keep on getting the same ValueError and none of the similar threats was of any help. None of my variables are string but still I am getting an error in conversion.
from pandas import Series, DataFrame
import pandas as pd
import numpy as np
import os
import matplotlib.pylab as plt
from sklearn.cross_validation import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report
import sklearn.metrics
os.chdir("C:\Mlearning")
"""
Data Engineering and Analysis
"""
#Load the dataset
AH_data = pd.read_csv("gapminder.csv")
data_clean = AH_data.dropna()
#data_clean.dtypes
#data_clean.describe()
"""
Modeling and Prediction
"""
#Split into training and testing sets
predictors = data_clean[['breastcancerper100th','alcconsumption']]
targets = data_clean.employrate
pred_train, pred_test, tar_train, tar_test = train_test_split(predictors, targets, test_size=.4)
pred_train.shape
pred_test.shape
tar_train.shape
tar_test.shape
#Build model on training data
classifier=DecisionTreeClassifier()
classifier=classifier.fit(pred_train,tar_train)
predictions=classifier.predict(pred_test)
sklearn.metrics.confusion_matrix(tar_test,predictions)
sklearn.metrics.accuracy_score(tar_test, predictions)
#Displaying the decision tree
from sklearn import tree
#from StringIO import StringIO
from io import StringIO
#from StringIO import StringIO
from IPython.display import Image
out = StringIO()
tree.export_graphviz(classifier, out_file=out)
import pydotplus
graph=pydotplus.graph_from_dot_data(out.getvalue())
graph.write_pdf("graph.pdf")
But the result that I am getting is this one:
array = np.array(array, dtype=dtype, order=order, copy=copy)
ValueError: could not convert string to float:
classifier.fit? or somewhere else? can you post a sample of the data you are trying to classify?data_clean.dtypeswould be useful, too (and perhapsdata_clean.head(), if you can share it).DecisionTreeRegressorinstead. We'll be able to help much better if you post a traceback, so that we can see which line theValueErroris coming from.