import pandas as pd
from time import time

# TODO: Read the five datasets using pd.read_csv()

# append the dataframes of each of the five datasets to dfs, running times of each dataset to pd_running_times

dfs = []
pd_running_times = []


from sklearn.cluster import KMeans as sklearnKMeans
from time import time

# TDOO: Run KMeans using sklearnKMeans on each dataset with n_clusters = 10

# measure the running time on each dataset and append to sklearn_running_times
sklearn_running_times = []


import cudf

# TODO: Read the five datasets using cudf.read_csv()

# append the dataframes of each of the five datasets to gdfs, running times of each dataset to cudf_running_times

gdfs = []
cudf_running_times = []


from cuml.cluster import KMeans as cumlKMeans
from time import time

# TDOO: Run KMeans using cumlKMeans on each dataset with n_clusters = 10

# measure the running time on each dataset and append to cuml_running_times

cuml_running_times = []


import matplotlib.pyplot as plt

X = [25000 , 63115, 200000, 300000, 1419554]
#X is number of lines of data in each data{0, 1, 2, 3, 4}.csv file

y = pd_running_times
z = cudf_running_times

#TODO: Plot a simultaneous graph with values of running times to read data of both pandas and cudf, i.e., pd_running_times and cudf_running_times

#X axis is the number of lines and Y axis has the time in milliseconds. Provide a legend to differentiate pd_running_times and cudf_running_times


y = sklearn_running_times
z = cuml_running_times

#TODO: Plot a simultaneous graph with values of running times to perform k-means using both sklearn and cuml, i.e., sklearn_running_times and cuml_running_times

#X axis is the number of lines and Y axis has the time in milliseconds. Provide a legend to differentiate sklearn_running_times and cuml_running_times

Download Data¶

Part 1. KMeans with CPU¶

Q1. Read the five provided datasets¶

Q2. Perform KMeans implemented in sklearn¶

Part 2. KMeans with GPU¶

Q3. Read datasets with cuDF¶

Q4. Perform KMeans implemented in cuML¶

Part 3. Plot the values¶