Skip to content

Commit 5eea653

Browse files
committed
nltk downloads
1 parent 5e75b89 commit 5eea653

File tree

1 file changed

+120
-107
lines changed

1 file changed

+120
-107
lines changed

KeywordExtractionNotebook.ipynb

+120-107
Original file line numberDiff line numberDiff line change
@@ -32,16 +32,29 @@
3232
},
3333
{
3434
"cell_type": "code",
35-
"execution_count": 1,
35+
"execution_count": 19,
3636
"metadata": {},
37-
"outputs": [],
37+
"outputs": [
38+
{
39+
"name": "stdout",
40+
"output_type": "stream",
41+
"text": [
42+
"[nltk_data] Downloading package stopwords to /home/jovyan/nltk_data...\n",
43+
"[nltk_data] Package stopwords is already up-to-date!\n",
44+
"[nltk_data] Downloading package wordnet to /home/jovyan/nltk_data...\n",
45+
"[nltk_data] Unzipping corpora/wordnet.zip.\n"
46+
]
47+
}
48+
],
3849
"source": [
3950
"#If this is the first time importing the stopwords package /\n",
4051
"#you must first download the stopwords using nltk.download('stopwords')\n",
41-
"#nltk.download('stopwords')\n",
4252
"import re\n",
4353
"import pandas\n",
54+
"import nltk\n",
4455
"from nltk.corpus import stopwords\n",
56+
"nltk.download('stopwords')\n",
57+
"nltk.download('wordnet')\n",
4558
"from nltk.stem.wordnet import WordNetLemmatizer\n",
4659
"from sklearn.feature_extraction.text import CountVectorizer"
4760
]
@@ -56,7 +69,7 @@
5669
},
5770
{
5871
"cell_type": "code",
59-
"execution_count": 2,
72+
"execution_count": 20,
6073
"metadata": {},
6174
"outputs": [
6275
{
@@ -180,7 +193,7 @@
180193
"4 Abstract Missing U sing a neural net to instantiate a\\ndeformab... "
181194
]
182195
},
183-
"execution_count": 2,
196+
"execution_count": 20,
184197
"metadata": {},
185198
"output_type": "execute_result"
186199
}
@@ -196,7 +209,7 @@
196209
},
197210
{
198211
"cell_type": "code",
199-
"execution_count": 3,
212+
"execution_count": 21,
200213
"metadata": {},
201214
"outputs": [
202215
{
@@ -334,7 +347,7 @@
334347
"2387 Regularized Boost for Semi-Supervised Learning... "
335348
]
336349
},
337-
"execution_count": 3,
350+
"execution_count": 21,
338351
"metadata": {},
339352
"output_type": "execute_result"
340353
}
@@ -348,7 +361,7 @@
348361
},
349362
{
350363
"cell_type": "code",
351-
"execution_count": 4,
364+
"execution_count": 22,
352365
"metadata": {},
353366
"outputs": [
354367
{
@@ -437,7 +450,7 @@
437450
"4 2387 Semi-supervised inductive learning concerns ho... 119"
438451
]
439452
},
440-
"execution_count": 4,
453+
"execution_count": 22,
441454
"metadata": {},
442455
"output_type": "execute_result"
443456
}
@@ -471,7 +484,7 @@
471484
},
472485
{
473486
"cell_type": "code",
474-
"execution_count": 46,
487+
"execution_count": 23,
475488
"metadata": {},
476489
"outputs": [],
477490
"source": [
@@ -480,77 +493,77 @@
480493
},
481494
{
482495
"cell_type": "code",
483-
"execution_count": 47,
496+
"execution_count": 24,
484497
"metadata": {},
485498
"outputs": [
486499
{
487500
"data": {
488501
"text/plain": [
489-
"the 29793\n",
490-
"of 20918\n",
491-
"a 16339\n",
492-
"and 13626\n",
493-
"to 12869\n",
494-
"in 8980\n",
495-
"that 7838\n",
496-
"is 7666\n",
497-
"for 7169\n",
498-
"on 5579\n",
499-
"we 5167\n",
500-
"We 4995\n",
501-
"with 4512\n",
502-
"this 3677\n",
503-
"as 3643\n",
504-
"are 3529\n",
505-
"an 3366\n",
506-
"by 3197\n",
507-
"can 2953\n",
508-
"learning 2825\n",
509-
"which 2781\n",
510-
"be 2673\n",
511-
"from 2622\n",
512-
"our 2446\n",
513-
"model 2408\n",
514-
"algorithm 2262\n",
515-
"show 2214\n",
516-
"In 2203\n",
517-
"data 2143\n",
518-
"The 2109\n",
519-
" ... \n",
520-
"influenced. 1\n",
521-
"20-layer 1\n",
522-
"quarter 1\n",
523-
"MC-SSDA 1\n",
524-
"HMDB 1\n",
525-
"problems.Despite 1\n",
526-
"8\\% 1\n",
527-
"massive-dimensional. 1\n",
528-
"harmonically-related 1\n",
529-
"make, 1\n",
530-
"IPDFs 1\n",
531-
"methods.Fine-grained 1\n",
532-
"unethical). 1\n",
533-
"plausible, 1\n",
534-
"ExtremeHunter 1\n",
535-
"planning.Despite 1\n",
536-
"Revenge'.We 1\n",
537-
"paths) 1\n",
538-
"O(T^{2/3} 1\n",
539-
"facilitatory 1\n",
540-
"deformations; 1\n",
541-
"negatives, 1\n",
542-
"order-$K$ 1\n",
543-
"rival 1\n",
544-
"heights 1\n",
545-
"weak-oracle.Mirror 1\n",
546-
"fails. 1\n",
547-
"efficiencies. 1\n",
548-
"root-tree 1\n",
549-
"practice.Furthermore, 1\n",
550-
"Length: 37676, dtype: int64"
502+
"the 2905\n",
503+
"of 2196\n",
504+
"a 1677\n",
505+
"and 1293\n",
506+
"to 1272\n",
507+
"in 994\n",
508+
"is 804\n",
509+
"that 795\n",
510+
"for 747\n",
511+
"on 515\n",
512+
"We 487\n",
513+
"we 460\n",
514+
"with 416\n",
515+
"as 367\n",
516+
"are 363\n",
517+
"this 357\n",
518+
"an 350\n",
519+
"learning 309\n",
520+
"be 305\n",
521+
"by 305\n",
522+
"can 302\n",
523+
"which 299\n",
524+
"from 265\n",
525+
"model 253\n",
526+
"The 241\n",
527+
"data 239\n",
528+
"In 226\n",
529+
"show 211\n",
530+
"algorithm 206\n",
531+
"our 190\n",
532+
" ... \n",
533+
"MB. 1\n",
534+
"($p$ 1\n",
535+
"smoother 1\n",
536+
"Performance 1\n",
537+
"attributes 1\n",
538+
"within-subject 1\n",
539+
"constraints; 1\n",
540+
"extracted. 1\n",
541+
"extant 1\n",
542+
"perspective. 1\n",
543+
"research, 1\n",
544+
"L1-norm. 1\n",
545+
"written 1\n",
546+
"low-load, 1\n",
547+
"equally 1\n",
548+
"0.86-- 1\n",
549+
"though, 1\n",
550+
"mapping. 1\n",
551+
"tree.Semi-supervised 1\n",
552+
"multi-component 1\n",
553+
"surround, 1\n",
554+
"respecting 1\n",
555+
"Sellie 1\n",
556+
"polynomial, 1\n",
557+
"energy.In 1\n",
558+
"(STDP). 1\n",
559+
"datasets.Many 1\n",
560+
"hierarchies, 1\n",
561+
"(Kalai 1\n",
562+
"creates 1\n",
563+
"Length: 8881, dtype: int64"
551564
]
552565
},
553-
"execution_count": 47,
566+
"execution_count": 24,
554567
"metadata": {},
555568
"output_type": "execute_result"
556569
}
@@ -576,7 +589,7 @@
576589
},
577590
{
578591
"cell_type": "code",
579-
"execution_count": 53,
592+
"execution_count": 25,
580593
"metadata": {},
581594
"outputs": [
582595
{
@@ -585,7 +598,7 @@
585598
"' Non negative matrix factorization NMF has previously been shown to be a useful decomposition for multivariate data Two different multi plicative algorithms for NMF are analyzed They differ only slightly in the multiplicative factor used in the update rules One algorithm can be shown to minimize the conventional least squares error while the other minimizes the generalized Kullback Leibler divergence The monotonic convergence of both algorithms can be proven using an auxiliary func tion analogous to that used for proving convergence of the Expectation Maximization algorithm The algorithms can also be interpreted as diag onally rescaled gradient descent where the rescaling factor is optimally chosen to ensure convergence Spike triggered averaging techniques are effective for linear characterization of neural responses But neurons exhibit important nonlinear behaviors such as gain control that are not captured by such analyses We describe a spike triggered covariance method for retrievi'"
586599
]
587600
},
588-
"execution_count": 53,
601+
"execution_count": 25,
589602
"metadata": {},
590603
"output_type": "execute_result"
591604
}
@@ -615,7 +628,7 @@
615628
},
616629
{
617630
"cell_type": "code",
618-
"execution_count": 59,
631+
"execution_count": 26,
619632
"metadata": {},
620633
"outputs": [
621634
{
@@ -625,7 +638,7 @@
625638
" 'Spike triggered averaging technique effective linear characterization neural response But neuron exhibit important nonlinear behavior gain control captured analysis We describe spike triggered covariance method retrieving suppressive component gain control signal neuron We demonstrate method simulation retinal ganglion cell data Analysis physiological data reveals significant suppressive ax explains neural nonlinearities This method applicable sensory area modality']"
626639
]
627640
},
628-
"execution_count": 59,
641+
"execution_count": 26,
629642
"metadata": {},
630643
"output_type": "execute_result"
631644
}
@@ -663,7 +676,7 @@
663676
},
664677
{
665678
"cell_type": "code",
666-
"execution_count": 60,
679+
"execution_count": 27,
667680
"metadata": {},
668681
"outputs": [],
669682
"source": [
@@ -672,17 +685,17 @@
672685
},
673686
{
674687
"cell_type": "code",
675-
"execution_count": 63,
688+
"execution_count": 28,
676689
"metadata": {},
677690
"outputs": [
678691
{
679692
"data": {
680693
"text/plain": [
681-
"<3924x10000 sparse matrix of type '<class 'numpy.int64'>'\n",
682-
"\twith 265145 stored elements in Compressed Sparse Row format>"
694+
"<413x4534 sparse matrix of type '<class 'numpy.int64'>'\n",
695+
"\twith 26681 stored elements in Compressed Sparse Row format>"
683696
]
684697
},
685-
"execution_count": 63,
698+
"execution_count": 28,
686699
"metadata": {},
687700
"output_type": "execute_result"
688701
}
@@ -696,7 +709,7 @@
696709
},
697710
{
698711
"cell_type": "code",
699-
"execution_count": 64,
712+
"execution_count": 29,
700713
"metadata": {},
701714
"outputs": [],
702715
"source": [
@@ -705,7 +718,7 @@
705718
},
706719
{
707720
"cell_type": "code",
708-
"execution_count": 65,
721+
"execution_count": 30,
709722
"metadata": {},
710723
"outputs": [],
711724
"source": [
@@ -714,35 +727,35 @@
714727
},
715728
{
716729
"cell_type": "code",
717-
"execution_count": 67,
730+
"execution_count": 31,
718731
"metadata": {},
719732
"outputs": [
720733
{
721734
"data": {
722735
"text/plain": [
723-
"[('model', 5023),\n",
724-
" ('algorithm', 4364),\n",
725-
" ('learning', 3604),\n",
726-
" ('method', 3328),\n",
727-
" ('problem', 3317),\n",
728-
" ('data', 3150),\n",
729-
" ('show', 2418),\n",
730-
" ('approach', 2006),\n",
731-
" ('function', 1953),\n",
732-
" ('based', 1854),\n",
733-
" ('network', 1785),\n",
734-
" ('result', 1741),\n",
735-
" ('time', 1486),\n",
736-
" ('paper', 1459),\n",
737-
" ('using', 1455),\n",
738-
" ('task', 1402),\n",
739-
" ('distribution', 1352),\n",
740-
" ('propose', 1351),\n",
741-
" ('state', 1277),\n",
742-
" ('feature', 1248)]"
736+
"[('model', 485),\n",
737+
" ('algorithm', 413),\n",
738+
" ('learning', 398),\n",
739+
" ('data', 356),\n",
740+
" ('method', 338),\n",
741+
" ('problem', 304),\n",
742+
" ('show', 234),\n",
743+
" ('based', 234),\n",
744+
" ('approach', 206),\n",
745+
" ('function', 185),\n",
746+
" ('using', 167),\n",
747+
" ('result', 164),\n",
748+
" ('paper', 149),\n",
749+
" ('task', 142),\n",
750+
" ('image', 141),\n",
751+
" ('set', 134),\n",
752+
" ('feature', 133),\n",
753+
" ('kernel', 130),\n",
754+
" ('present', 129),\n",
755+
" ('new', 126)]"
743756
]
744757
},
745-
"execution_count": 67,
758+
"execution_count": 31,
746759
"metadata": {},
747760
"output_type": "execute_result"
748761
}

0 commit comments

Comments
 (0)