close all % Close all figure windows clear all % Clear all variables in memory fprintf('Loading "abalone.dat"...\n'); load abalone.dat % Load the data set feature_n = size(abalone, 2)-1; % no. of features instance_n = size(abalone, 1); % no. of instances feature = abalone(:, 1:feature_n); % feature matrix output = abalone(:, feature_n+1); % output matrix [a, b] = countele(output); class_n = length(a); % No. of classes fprintf('%g features\n', feature_n); fprintf('%g instances\n', instance_n); fprintf('%g classes\n', class_n); % Plot age distribution bar(a, b); xlabel('Age'); ylabel('Counts'); title('Age Distribution for the Abalone Data Set'); fprintf('Class 1: %g instances are younger than 10 years\n',... length(find(output<10))); fprintf('Class 2: %g instances are equal to or older than 10 years\n',... length(find(output>=10))); % Modify the data set such that instances younger than 10 years fall % into class 1; all the others into class 2 index1 = find(output<10); output(index1) = 1*ones(size(index1)); index2 = find(output>=10); output(index2) = 2*ones(size(index2)); % Data normalization to have zero mean and unity variance r.v. new_feature = normal(feature); data = [new_feature output]; % Partition the data sets for hold-out tests index1 = 1:2:instance_n; index2 = 2:2:instance_n; data1 = data(index1, :); data2 = data(index2, :); k = 3; % for 3 nearest neighbor %tic %label = knnr(data1, data2, k); %toc % hold-out test 1 desired_label = data2(:, feature_n+1); label = zeros(size(desired_label)); tic for i = 1:size(data2, 1), if rem(i, 100)==0, fprintf('%g/%g\n', i, size(data2,1)); end label(i) = knnr(data1, data2(i, :), k); end toc right_count = sum(label==desired_label); recog_rate = right_count/length(desired_label); fprintf('Recognition rate = %g/%g = %g\n', ... right_count, size(data2, 1), recog_rate); % Swap data sets temp = data1; data1 = data2; data2 = temp; % hold-out test 2 desired_label = data2(:, feature_n+1); label = zeros(size(desired_label)); tic for i = 1:size(data2, 1), if rem(i, 100)==0, fprintf('%g/%g\n', i, size(data2,1)); end label(i) = knnr(data1, data2(i, :), k); end toc right_count = sum(label==desired_label); recog_rate = right_count/length(desired_label); fprintf('Recognition rate = %g/%g = %g\n', ... right_count, size(data2, 1), recog_rate);