nltk downloads

Moro-Code · Moro-Code · commit 5eea65328068 · 2019-03-05T00:26:06.000-05:00
diff --git a/KeywordExtractionNotebook.ipynb b/KeywordExtractionNotebook.ipynb
@@ -32,16 +32,29 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 19,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[nltk_data] Downloading package stopwords to /home/jovyan/nltk_data...\n",
+      "[nltk_data]   Package stopwords is already up-to-date!\n",
+      "[nltk_data] Downloading package wordnet to /home/jovyan/nltk_data...\n",
+      "[nltk_data]   Unzipping corpora/wordnet.zip.\n"
+     ]
+    }
+   ],
    "source": [
     "#If this is the first time importing the stopwords package /\n",
     "#you must first download the stopwords using nltk.download('stopwords')\n",
-    "#nltk.download('stopwords')\n",
     "import re\n",
     "import pandas\n",
+    "import nltk\n",
     "from nltk.corpus import stopwords\n",
+    "nltk.download('stopwords')\n",
+    "nltk.download('wordnet')\n",
     "from nltk.stem.wordnet import WordNetLemmatizer\n",
     "from sklearn.feature_extraction.text import CountVectorizer"
    ]
@@ -56,7 +69,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
@@ -180,7 +193,7 @@
        "4  Abstract Missing  U sing a neural net to instantiate a\\ndeformab...  "
       ]
      },
-     "execution_count": 2,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -196,7 +209,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [
     {
@@ -334,7 +347,7 @@
        "2387  Regularized Boost for Semi-Supervised Learning...  "
       ]
      },
-     "execution_count": 3,
+     "execution_count": 21,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -348,7 +361,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [
     {
@@ -437,7 +450,7 @@
        "4   2387  Semi-supervised inductive learning concerns ho...         119"
       ]
      },
-     "execution_count": 4,
+     "execution_count": 22,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -471,7 +484,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 46,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -480,77 +493,77 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 47,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "the                      29793\n",
-       "of                       20918\n",
-       "a                        16339\n",
-       "and                      13626\n",
-       "to                       12869\n",
-       "in                        8980\n",
-       "that                      7838\n",
-       "is                        7666\n",
-       "for                       7169\n",
-       "on                        5579\n",
-       "we                        5167\n",
-       "We                        4995\n",
-       "with                      4512\n",
-       "this                      3677\n",
-       "as                        3643\n",
-       "are                       3529\n",
-       "an                        3366\n",
-       "by                        3197\n",
-       "can                       2953\n",
-       "learning                  2825\n",
-       "which                     2781\n",
-       "be                        2673\n",
-       "from                      2622\n",
-       "our                       2446\n",
-       "model                     2408\n",
-       "algorithm                 2262\n",
-       "show                      2214\n",
-       "In                        2203\n",
-       "data                      2143\n",
-       "The                       2109\n",
-       "                         ...  \n",
-       "influenced.                  1\n",
-       "20-layer                     1\n",
-       "quarter                      1\n",
-       "MC-SSDA                      1\n",
-       "HMDB                         1\n",
-       "problems.Despite             1\n",
-       "8\\%                          1\n",
-       "massive-dimensional.         1\n",
-       "harmonically-related         1\n",
-       "make,                        1\n",
-       "IPDFs                        1\n",
-       "methods.Fine-grained         1\n",
-       "unethical).                  1\n",
-       "plausible,                   1\n",
-       "ExtremeHunter                1\n",
-       "planning.Despite             1\n",
-       "Revenge'.We                  1\n",
-       "paths)                       1\n",
-       "O(T^{2/3}                    1\n",
-       "facilitatory                 1\n",
-       "deformations;                1\n",
-       "negatives,                   1\n",
-       "order-$K$                    1\n",
-       "rival                        1\n",
-       "heights                      1\n",
-       "weak-oracle.Mirror           1\n",
-       "fails.                       1\n",
-       "efficiencies.                1\n",
-       "root-tree                    1\n",
-       "practice.Furthermore,        1\n",
-       "Length: 37676, dtype: int64"
+       "the                     2905\n",
+       "of                      2196\n",
+       "a                       1677\n",
+       "and                     1293\n",
+       "to                      1272\n",
+       "in                       994\n",
+       "is                       804\n",
+       "that                     795\n",
+       "for                      747\n",
+       "on                       515\n",
+       "We                       487\n",
+       "we                       460\n",
+       "with                     416\n",
+       "as                       367\n",
+       "are                      363\n",
+       "this                     357\n",
+       "an                       350\n",
+       "learning                 309\n",
+       "be                       305\n",
+       "by                       305\n",
+       "can                      302\n",
+       "which                    299\n",
+       "from                     265\n",
+       "model                    253\n",
+       "The                      241\n",
+       "data                     239\n",
+       "In                       226\n",
+       "show                     211\n",
+       "algorithm                206\n",
+       "our                      190\n",
+       "                        ... \n",
+       "MB.                        1\n",
+       "($p$                       1\n",
+       "smoother                   1\n",
+       "Performance                1\n",
+       "attributes                 1\n",
+       "within-subject             1\n",
+       "constraints;               1\n",
+       "extracted.                 1\n",
+       "extant                     1\n",
+       "perspective.               1\n",
+       "research,                  1\n",
+       "L1-norm.                   1\n",
+       "written                    1\n",
+       "low-load,                  1\n",
+       "equally                    1\n",
+       "0.86--                     1\n",
+       "though,                    1\n",
+       "mapping.                   1\n",
+       "tree.Semi-supervised       1\n",
+       "multi-component            1\n",
+       "surround,                  1\n",
+       "respecting                 1\n",
+       "Sellie                     1\n",
+       "polynomial,                1\n",
+       "energy.In                  1\n",
+       "(STDP).                    1\n",
+       "datasets.Many              1\n",
+       "hierarchies,               1\n",
+       "(Kalai                     1\n",
+       "creates                    1\n",
+       "Length: 8881, dtype: int64"
       ]
      },
-     "execution_count": 47,
+     "execution_count": 24,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -576,7 +589,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 53,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [
     {
@@ -585,7 +598,7 @@
        "' Non negative matrix factorization NMF has previously been shown to be a useful decomposition for multivariate data Two different multi plicative algorithms for NMF are analyzed They differ only slightly in the multiplicative factor used in the update rules One algorithm can be shown to minimize the conventional least squares error while the other minimizes the generalized Kullback Leibler divergence The monotonic convergence of both algorithms can be proven using an auxiliary func tion analogous to that used for proving convergence of the Expectation Maximization algorithm The algorithms can also be interpreted as diag onally rescaled gradient descent where the rescaling factor is optimally chosen to ensure convergence  Spike triggered averaging techniques are effective for linear characterization of neural responses But neurons exhibit important nonlinear behaviors such as gain control that are not captured by such analyses We describe a spike triggered covariance method for retrievi'"
       ]
      },
-     "execution_count": 53,
+     "execution_count": 25,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -615,7 +628,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 59,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [
     {
@@ -625,7 +638,7 @@
        " 'Spike triggered averaging technique effective linear characterization neural response But neuron exhibit important nonlinear behavior gain control captured analysis We describe spike triggered covariance method retrieving suppressive component gain control signal neuron We demonstrate method simulation retinal ganglion cell data Analysis physiological data reveals significant suppressive ax explains neural nonlinearities This method applicable sensory area modality']"
       ]
      },
-     "execution_count": 59,
+     "execution_count": 26,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -663,7 +676,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 60,
+   "execution_count": 27,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -672,17 +685,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 63,
+   "execution_count": 28,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "<3924x10000 sparse matrix of type '<class 'numpy.int64'>'\n",
-       "\twith 265145 stored elements in Compressed Sparse Row format>"
+       "<413x4534 sparse matrix of type '<class 'numpy.int64'>'\n",
+       "\twith 26681 stored elements in Compressed Sparse Row format>"
       ]
      },
-     "execution_count": 63,
+     "execution_count": 28,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -696,7 +709,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 64,
+   "execution_count": 29,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -705,7 +718,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 65,
+   "execution_count": 30,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -714,35 +727,35 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 67,
+   "execution_count": 31,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "[('model', 5023),\n",
-       " ('algorithm', 4364),\n",
-       " ('learning', 3604),\n",
-       " ('method', 3328),\n",
-       " ('problem', 3317),\n",
-       " ('data', 3150),\n",
-       " ('show', 2418),\n",
-       " ('approach', 2006),\n",
-       " ('function', 1953),\n",
-       " ('based', 1854),\n",
-       " ('network', 1785),\n",
-       " ('result', 1741),\n",
-       " ('time', 1486),\n",
-       " ('paper', 1459),\n",
-       " ('using', 1455),\n",
-       " ('task', 1402),\n",
-       " ('distribution', 1352),\n",
-       " ('propose', 1351),\n",
-       " ('state', 1277),\n",
-       " ('feature', 1248)]"
+       "[('model', 485),\n",
+       " ('algorithm', 413),\n",
+       " ('learning', 398),\n",
+       " ('data', 356),\n",
+       " ('method', 338),\n",
+       " ('problem', 304),\n",
+       " ('show', 234),\n",
+       " ('based', 234),\n",
+       " ('approach', 206),\n",
+       " ('function', 185),\n",
+       " ('using', 167),\n",
+       " ('result', 164),\n",
+       " ('paper', 149),\n",
+       " ('task', 142),\n",
+       " ('image', 141),\n",
+       " ('set', 134),\n",
+       " ('feature', 133),\n",
+       " ('kernel', 130),\n",
+       " ('present', 129),\n",
+       " ('new', 126)]"
       ]
      },
-     "execution_count": 67,
+     "execution_count": 31,
      "metadata": {},
      "output_type": "execute_result"
     }