QuestionQuestion

Please see Question.pdf

Solution PreviewSolution Preview

This material may consist of step-by-step explanations on how to solve a problem or examples of proper writing, including the use of citations, references, bibliographies, and formatting. This material is made available for the sole purpose of studying and learning - misuse is strictly forbidden.

],
   "source": [
    "X, y = make_blobs(n_samples=300, centers =3, cluster_std=3.5,random_state=42)\n",
    "plt.figure(figsize=(8, 6))\n",
    "ax=plt.subplot(111)\n",
    "ax.scatter(X[:, 0], X[:, 1], c=y, s=40, cmap='viridis')\n",
    "plt.show()"
   ]
},
{
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "URtzpJITZ04e"
   },
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4,\n",
    "random_state=0)"
   ]
},
{
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "MHhIZTnVba0U"
   },
   "source": [
    "## 1."
   ]
},
{
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "MNNnB5IAbaUA"
   },
   "outputs": [],
   "source": [
    "k = 3\n",
    "\n",
    "epsilon = 0.00001\n",
    "max_iter = 10000\n",
    "\n",
    "\n",
    "def dist(row_a, row_b):\n",
    "    d = 0\n",
    "    for i in range(len(row_a)):\n",
    "       d += (row_a[i] - row_b[i]) ** 2\n",
    "    d = math.sqrt(d)\n",
    "    return d\n",
    "\n",
    "df = pd.DataFrame(X_train)\n",
    "df.reset_index()\n",
    "\n",
    "n_test = len(X_test)\n",
    "\n",
    "df[\"pseudo\"] = y_train\n",
    "\n",
    "centroids = df.groupby(\"pseudo\").agg(np.mean)\n",
    "centroids.reset_index()\n",
    "\n",
    "\n",
    "index = 0\n",
    "df_test = None\n",
    "while index < max_iter:\n",
    "    pseudo_labels = []\n",
    "    for i in range(0, n_test):\n",
    "       dis = []\n",
    "       for cluster in range(0, k):\n",
    "            d = dist(centroids.iloc[cluster,], X_test[i])\n",
    "            dis.append(d)\n",
    "       pseudo_label = dis.index(min(dis))\n",
    "       pseudo_labels.append(pseudo_label)\n",
    "\n",
    "    df_test = pd.DataFrame(X_test)\n",
    "    df_test[\"pseudo\"] = pseudo_labels\n",
    "    df3 = df.append(df_test, ignore_index=True)\n",
    "    centroids_new = df3.groupby(\"pseudo\").agg(np.mean)\n",
    "    centroids_new.reset_index()\n",
    "    all_converged = 0\n",
    "    for i in range(0, len(centroids)):\n",
    "       d = dist(centroids.iloc[i, ], centroids_new.iloc[i,])\n",
    "       if d < epsilon:\n",
    "            all_converged += 1\n",
    "    if all_converged == k:\n",
    "       break\n",
    "    else:\n",
    "       centroids = centroids_new\n",
    "\n",
    "    index += 1\n",
    "df_test[\"y\"] = y_test\n",
    "df[\"y\"] = y_train\n",
    "\n"
   ]
},
{
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "rFYcDlko-5Tq"
   },
   "source": [
    "## 2."
   ]
},
{
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {
    "colab": {
    "base_uri": "https://localhost:8080/",
    "height": 190
    },
    "colab_type": "code",
    "id": "NN6hYscT8ctk",
    "outputId": "def713ac-5095-4e15-de09-9e12f2b663be"
   },
   "outputs": [
    {
    "name": "stdout",
    "output_type": "stream",
    "text": [
      "Mean of each cluster:\n"
    ]
    },
    {
    "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "       vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "       vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "       text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       " <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>y</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       " </thead>\n",
       " <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-2.836760</td>\n",
       "      <td>9.192985</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4.654930</td>\n",
       "      <td>2.271495</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-6.839723</td>\n",
       "      <td>-7.237148</td>\n",
       "    </tr>\n",
       " </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          0         1\n",
       "y                   \n",
       "0 -2.836760 9.192985\n",
       "1 4.654930 2.271495\n",
       "2 -6.839723 -7.237148"
      ]
    },
    "execution_count": 42,
    "metadata": {
      "tags": []
    },
    "output_type": "execute_result"
    }
   ],
   "source": [
    "test_pseudo = df_test[\"pseudo\"]\n",
    "del df_test[\"pseudo\"]\n",
    "df_test.reset_index()\n",
    "mean_mat = df_test.groupby(\"y\").agg(np.mean)\n",
    "print(\"Mean of each cluster:\")\n",
    "mean_mat"
   ]
},
{
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {
    "colab": {
    "base_uri": "https://localhost:8080/",
    "height": 190
    },
    "colab_type": "code",
    "id": "aQvDj9dT8p3r",
    "outputId": "22cd8019-8c58-4cf2-ee74-f74ad790eb86"
   },
   "outputs": [
    {
    "name": "stdout...
$50.00 for this solution

PayPal, G Pay, ApplePay, Amazon Pay, and all major credit cards accepted.

Find A Tutor

View available Machine Learning Tutors

Get College Homework Help.

Are you sure you don't want to upload any files?

Fast tutor response requires as much info as possible.

Decision:
Upload a file
Continue without uploading

SUBMIT YOUR HOMEWORK
We couldn't find that subject.
Please select the best match from the list below.

We'll send you an email right away. If it's not in your inbox, check your spam folder.

  • 1
  • 2
  • 3
Live Chats