Sep-06-2022, 09:44 PM
# Create a pipeline that extracts features from the data then creates (evaluates) a model import pandas as pd import numpy as np from sklearn.model_selection import KFold from sklearn.model_selection import cross_val_score from sklearn.pipeline import Pipeline from sklearn.pipeline import FeatureUnion from sklearn.linear_model import LogisticRegression from sklearn.decomposition import PCA from sklearn.feature_selection import SelectKBest filename = 'pima-indians-diabetes.data.csv' names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class'] dataframe = pd.read_csv(filename, names=names) array = dataframe.values X = array[:,0:8] y = array[:,8] features = [] features.append(('pca', PCA(n_components=3))) features.append(('select_best', SelectKBest(k=6))) feature_union = FeatureUnion(features) # create pipeline estimators = [] estimators.append(('feature_union', feature_union)) estimators.append(('logistic', LogisticRegression(solver='newton-cg'))) model = Pipeline(estimators) # evaluate pipeline seed = 7 kfold = KFold(n_splits=10, shuffle=True, random_state=seed) results = cross_val_score(model, X, y, cv=kfold) print(results.mean())