1+ {
2+ "nbformat" : 4 ,
3+ "nbformat_minor" : 0 ,
4+ "metadata" : {
5+ "colab" : {
6+ "provenance" : []
7+ },
8+ "kernelspec" : {
9+ "name" : " python3" ,
10+ "display_name" : " Python 3"
11+ },
12+ "language_info" : {
13+ "name" : " python"
14+ }
15+ },
16+ "cells" : [
17+ {
18+ "cell_type" : " markdown" ,
19+ "source" : [
20+ " #Libraries"
21+ ],
22+ "metadata" : {
23+ "id" : " XmjSOfm5C7Y3"
24+ }
25+ },
26+ {
27+ "cell_type" : " code" ,
28+ "execution_count" : null ,
29+ "metadata" : {
30+ "colab" : {
31+ "base_uri" : " https://localhost:8080/"
32+ },
33+ "id" : " 3syypoOe4SZ0" ,
34+ "outputId" : " b319cd48-1f8c-46aa-8e76-721f90fb13b9"
35+ },
36+ "outputs" : [
37+ {
38+ "output_type" : " stream" ,
39+ "name" : " stdout" ,
40+ "text" : [
41+ " Requirement already satisfied: ktrain in /usr/local/lib/python3.10/dist-packages (0.37.6)\n " ,
42+ " Requirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (from ktrain) (1.2.2)\n " ,
43+ " Requirement already satisfied: matplotlib>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from ktrain) (3.7.1)\n " ,
44+ " Requirement already satisfied: pandas>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from ktrain) (1.5.3)\n " ,
45+ " Requirement already satisfied: fastprogress>=0.1.21 in /usr/local/lib/python3.10/dist-packages (from ktrain) (1.0.3)\n " ,
46+ " Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from ktrain) (2.27.1)\n " ,
47+ " Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from ktrain) (1.3.1)\n " ,
48+ " Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from ktrain) (23.1)\n " ,
49+ " Requirement already satisfied: langdetect in /usr/local/lib/python3.10/dist-packages (from ktrain) (1.0.9)\n " ,
50+ " Requirement already satisfied: jieba in /usr/local/lib/python3.10/dist-packages (from ktrain) (0.42.1)\n " ,
51+ " Requirement already satisfied: cchardet in /usr/local/lib/python3.10/dist-packages (from ktrain) (2.1.7)\n " ,
52+ " Requirement already satisfied: chardet in /usr/local/lib/python3.10/dist-packages (from ktrain) (4.0.0)\n " ,
53+ " Requirement already satisfied: syntok>1.3.3 in /usr/local/lib/python3.10/dist-packages (from ktrain) (1.4.4)\n " ,
54+ " Requirement already satisfied: tika in /usr/local/lib/python3.10/dist-packages (from ktrain) (2.6.0)\n " ,
55+ " Requirement already satisfied: transformers>=4.17.0 in /usr/local/lib/python3.10/dist-packages (from ktrain) (4.31.0)\n " ,
56+ " Requirement already satisfied: sentencepiece in /usr/local/lib/python3.10/dist-packages (from ktrain) (0.1.99)\n " ,
57+ " Requirement already satisfied: keras-bert>=0.86.0 in /usr/local/lib/python3.10/dist-packages (from ktrain) (0.89.0)\n " ,
58+ " Requirement already satisfied: whoosh in /usr/local/lib/python3.10/dist-packages (from ktrain) (2.7.4)\n " ,
59+ " Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from keras-bert>=0.86.0->ktrain) (1.22.4)\n " ,
60+ " Requirement already satisfied: keras-transformer==0.40.0 in /usr/local/lib/python3.10/dist-packages (from keras-bert>=0.86.0->ktrain) (0.40.0)\n " ,
61+ " Requirement already satisfied: keras-pos-embd==0.13.0 in /usr/local/lib/python3.10/dist-packages (from keras-transformer==0.40.0->keras-bert>=0.86.0->ktrain) (0.13.0)\n " ,
62+ " Requirement already satisfied: keras-multi-head==0.29.0 in /usr/local/lib/python3.10/dist-packages (from keras-transformer==0.40.0->keras-bert>=0.86.0->ktrain) (0.29.0)\n " ,
63+ " Requirement already satisfied: keras-layer-normalization==0.16.0 in /usr/local/lib/python3.10/dist-packages (from keras-transformer==0.40.0->keras-bert>=0.86.0->ktrain) (0.16.0)\n " ,
64+ " Requirement already satisfied: keras-position-wise-feed-forward==0.8.0 in /usr/local/lib/python3.10/dist-packages (from keras-transformer==0.40.0->keras-bert>=0.86.0->ktrain) (0.8.0)\n " ,
65+ " Requirement already satisfied: keras-embed-sim==0.10.0 in /usr/local/lib/python3.10/dist-packages (from keras-transformer==0.40.0->keras-bert>=0.86.0->ktrain) (0.10.0)\n " ,
66+ " Requirement already satisfied: keras-self-attention==0.51.0 in /usr/local/lib/python3.10/dist-packages (from keras-multi-head==0.29.0->keras-transformer==0.40.0->keras-bert>=0.86.0->ktrain) (0.51.0)\n " ,
67+ " Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0.0->ktrain) (1.1.0)\n " ,
68+ " Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0.0->ktrain) (0.11.0)\n " ,
69+ " Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0.0->ktrain) (4.41.0)\n " ,
70+ " Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0.0->ktrain) (1.4.4)\n " ,
71+ " Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0.0->ktrain) (8.4.0)\n " ,
72+ " Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0.0->ktrain) (3.1.0)\n " ,
73+ " Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0.0->ktrain) (2.8.2)\n " ,
74+ " Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.0.1->ktrain) (2022.7.1)\n " ,
75+ " Requirement already satisfied: regex>2016 in /usr/local/lib/python3.10/dist-packages (from syntok>1.3.3->ktrain) (2022.10.31)\n " ,
76+ " Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers>=4.17.0->ktrain) (3.12.2)\n " ,
77+ " Requirement already satisfied: huggingface-hub<1.0,>=0.14.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.17.0->ktrain) (0.16.4)\n " ,
78+ " Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.17.0->ktrain) (6.0.1)\n " ,
79+ " Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.17.0->ktrain) (0.13.3)\n " ,
80+ " Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.17.0->ktrain) (0.3.1)\n " ,
81+ " Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.17.0->ktrain) (4.65.0)\n " ,
82+ " Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from langdetect->ktrain) (1.16.0)\n " ,
83+ " Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->ktrain) (1.26.16)\n " ,
84+ " Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->ktrain) (2023.5.7)\n " ,
85+ " Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->ktrain) (2.0.12)\n " ,
86+ " Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->ktrain) (3.4)\n " ,
87+ " Requirement already satisfied: scipy>=1.3.2 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->ktrain) (1.10.1)\n " ,
88+ " Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->ktrain) (3.2.0)\n " ,
89+ " Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from tika->ktrain) (67.7.2)\n " ,
90+ " Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.14.1->transformers>=4.17.0->ktrain) (2023.6.0)\n " ,
91+ " Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.14.1->transformers>=4.17.0->ktrain) (4.7.1)\n "
92+ ]
93+ }
94+ ],
95+ "source" : [
96+ " !pip3 install ktrain"
97+ ]
98+ },
99+ {
100+ "cell_type" : " code" ,
101+ "source" : [
102+ " import os.path\n " ,
103+ " import numpy as np\n " ,
104+ " import ktrain\n " ,
105+ " from ktrain import text\n " ,
106+ " import tensorflow"
107+ ],
108+ "metadata" : {
109+ "id" : " 0ZejN0MU6dnb"
110+ },
111+ "execution_count" : null ,
112+ "outputs" : []
113+ },
114+ {
115+ "cell_type" : " markdown" ,
116+ "source" : [
117+ " #Dataset"
118+ ],
119+ "metadata" : {
120+ "id" : " oSJh43dYC_I4"
121+ }
122+ },
123+ {
124+ "cell_type" : " code" ,
125+ "source" : [
126+ " data=tensorflow.keras.utils.get_file(fname=\" aclImdb_v1.tar.gz\" ,origin=\" http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz\" ,extract=True)"
127+ ],
128+ "metadata" : {
129+ "id" : " navTD1Nu7NMH"
130+ },
131+ "execution_count" : null ,
132+ "outputs" : []
133+ },
134+ {
135+ "cell_type" : " code" ,
136+ "source" : [
137+ " dir=os.path.join(os.path.dirname(data),\" aclImdb\" )"
138+ ],
139+ "metadata" : {
140+ "id" : " DJD9_h829wMX"
141+ },
142+ "execution_count" : null ,
143+ "outputs" : []
144+ },
145+ {
146+ "cell_type" : " code" ,
147+ "source" : [
148+ " (x_train,y_train),(x_test,y_test),preproc=text.texts_from_folder(datadir=dir,classes=[\" pos\" ,\" neg\" ],train_test_names=[\" train\" ,\" test\" ],preprocess_mode=\" bert\" )"
149+ ],
150+ "metadata" : {
151+ "colab" : {
152+ "base_uri" : " https://localhost:8080/" ,
153+ "height" : 161
154+ },
155+ "id" : " M84oU3gM-1zZ" ,
156+ "outputId" : " 4cfe9061-cd3f-4d21-8826-c78853d4e090"
157+ },
158+ "execution_count" : null ,
159+ "outputs" : [
160+ {
161+ "output_type" : " stream" ,
162+ "name" : " stdout" ,
163+ "text" : [
164+ " detected encoding: utf-8\n " ,
165+ " preprocessing train...\n " ,
166+ " language: en\n "
167+ ]
168+ },
169+ {
170+ "output_type" : " display_data" ,
171+ "data" : {
172+ "text/plain" : [
173+ " <IPython.core.display.HTML object>"
174+ ],
175+ "text/html" : [
176+ " \n " ,
177+ " <style>\n " ,
178+ " /* Turns off some styling */\n " ,
179+ " progress {\n " ,
180+ " /* gets rid of default border in Firefox and Opera. */\n " ,
181+ " border: none;\n " ,
182+ " /* Needs to be in here for Safari polyfill so background images work as expected. */\n " ,
183+ " background-size: auto;\n " ,
184+ " }\n " ,
185+ " progress:not([value]), progress:not([value])::-webkit-progress-bar {\n " ,
186+ " background: repeating-linear-gradient(45deg, #7e7e7e, #7e7e7e 10px, #5c5c5c 10px, #5c5c5c 20px);\n " ,
187+ " }\n " ,
188+ " .progress-bar-interrupted, .progress-bar-interrupted::-webkit-progress-bar {\n " ,
189+ " background: #F44336;\n " ,
190+ " }\n " ,
191+ " </style>\n "
192+ ]
193+ },
194+ "metadata" : {}
195+ },
196+ {
197+ "output_type" : " display_data" ,
198+ "data" : {
199+ "text/plain" : [
200+ " <IPython.core.display.HTML object>"
201+ ],
202+ "text/html" : [
203+ " done."
204+ ]
205+ },
206+ "metadata" : {}
207+ },
208+ {
209+ "output_type" : " stream" ,
210+ "name" : " stdout" ,
211+ "text" : [
212+ " Is Multi-Label? False\n " ,
213+ " preprocessing test...\n " ,
214+ " language: en\n "
215+ ]
216+ },
217+ {
218+ "output_type" : " display_data" ,
219+ "data" : {
220+ "text/plain" : [
221+ " <IPython.core.display.HTML object>"
222+ ],
223+ "text/html" : [
224+ " \n " ,
225+ " <style>\n " ,
226+ " /* Turns off some styling */\n " ,
227+ " progress {\n " ,
228+ " /* gets rid of default border in Firefox and Opera. */\n " ,
229+ " border: none;\n " ,
230+ " /* Needs to be in here for Safari polyfill so background images work as expected. */\n " ,
231+ " background-size: auto;\n " ,
232+ " }\n " ,
233+ " progress:not([value]), progress:not([value])::-webkit-progress-bar {\n " ,
234+ " background: repeating-linear-gradient(45deg, #7e7e7e, #7e7e7e 10px, #5c5c5c 10px, #5c5c5c 20px);\n " ,
235+ " }\n " ,
236+ " .progress-bar-interrupted, .progress-bar-interrupted::-webkit-progress-bar {\n " ,
237+ " background: #F44336;\n " ,
238+ " }\n " ,
239+ " </style>\n "
240+ ]
241+ },
242+ "metadata" : {}
243+ },
244+ {
245+ "output_type" : " display_data" ,
246+ "data" : {
247+ "text/plain" : [
248+ " <IPython.core.display.HTML object>"
249+ ],
250+ "text/html" : [
251+ " done."
252+ ]
253+ },
254+ "metadata" : {}
255+ }
256+ ]
257+ },
258+ {
259+ "cell_type" : " markdown" ,
260+ "source" : [
261+ " #BERT Model(Bidirectional Encoder Representations from Transformers)"
262+ ],
263+ "metadata" : {
264+ "id" : " HsD1RIeyDDHi"
265+ }
266+ },
267+ {
268+ "cell_type" : " code" ,
269+ "source" : [
270+ " model=text.text_classifier(name=\" bert\" ,train_data=(x_train,y_train),preproc=preproc)"
271+ ],
272+ "metadata" : {
273+ "id" : " egXY63ExDBG9" ,
274+ "colab" : {
275+ "base_uri" : " https://localhost:8080/"
276+ },
277+ "outputId" : " 9fec6679-1aeb-4098-e9d4-57cb869765cd"
278+ },
279+ "execution_count" : null ,
280+ "outputs" : [
281+ {
282+ "output_type" : " stream" ,
283+ "name" : " stdout" ,
284+ "text" : [
285+ " Is Multi-Label? False\n " ,
286+ " maxlen is 400\n "
287+ ]
288+ },
289+ {
290+ "output_type" : " stream" ,
291+ "name" : " stderr" ,
292+ "text" : [
293+ " /usr/local/lib/python3.10/dist-packages/keras/initializers/initializers.py:120: UserWarning: The initializer GlorotNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n " ,
294+ " warnings.warn(\n "
295+ ]
296+ },
297+ {
298+ "output_type" : " stream" ,
299+ "name" : " stdout" ,
300+ "text" : [
301+ " done.\n "
302+ ]
303+ }
304+ ]
305+ },
306+ {
307+ "cell_type" : " code" ,
308+ "source" : [
309+ " a=ktrain.get_learner(model=model,train_data=(x_train,y_train),val_data=(x_test,y_test),batch_size=32)"
310+ ],
311+ "metadata" : {
312+ "id" : " ICtxz7LHaB1I" ,
313+ "colab" : {
314+ "base_uri" : " https://localhost:8080/"
315+ },
316+ "outputId" : " c3b1c676-3fab-4445-e227-975f6a015e16"
317+ },
318+ "execution_count" : null ,
319+ "outputs" : [
320+ {
321+ "output_type" : " stream" ,
322+ "name" : " stderr" ,
323+ "text" : [
324+ " /usr/local/lib/python3.10/dist-packages/ktrain/__init__.py:100: UserWarning: For a GPU with 12GB of RAM, the following maxima apply:\n " ,
325+ " sequence len=64, max_batch_size=64\n " ,
326+ " sequence len=128, max_batch_size=32\n " ,
327+ " sequence len=256, max_batch_size=16\n " ,
328+ " sequence len=320, max_batch_size=14\n " ,
329+ " sequence len=384, max_batch_size=12\n " ,
330+ " sequence len=512, max_batch_size=6\n " ,
331+ " \n " ,
332+ " You've exceeded these limits.\n " ,
333+ " If using a GPU with <=12GB of memory, you may run out of memory during training.\n " ,
334+ " If necessary, adjust sequence length or batch size based on above.\n " ,
335+ " I.warnings.warn(msg)\n "
336+ ]
337+ }
338+ ]
339+ },
340+ {
341+ "cell_type" : " code" ,
342+ "source" : [
343+ " a.fit_onecycle(lr=2e-5,epochs=1)"
344+ ],
345+ "metadata" : {
346+ "id" : " mAjZxMowbr_R" ,
347+ "colab" : {
348+ "base_uri" : " https://localhost:8080/" ,
349+ "height" : 171
350+ },
351+ "outputId" : " 47cc0abe-4083-4cd5-cc8d-6d1ee3e5cb31"
352+ },
353+ "execution_count" : null ,
354+ "outputs" : [
355+ {
356+ "output_type" : " error" ,
357+ "ename" : " NameError" ,
358+ "evalue" : " ignored" ,
359+ "traceback" : [
360+ " \u001b [0;31m---------------------------------------------------------------------------\u001b [0m" ,
361+ " \u001b [0;31mNameError\u001b [0m Traceback (most recent call last)" ,
362+ " \u001b [0;32m<ipython-input-1-3c959640d8b7>\u001b [0m in \u001b [0;36m<cell line: 1>\u001b [0;34m()\u001b [0m\n \u001b [0;32m----> 1\u001b [0;31m \u001b [0ma\u001b [0m\u001b [0;34m.\u001b [0m\u001b [0mfit_onecycle\u001b [0m\u001b [0;34m(\u001b [0m\u001b [0mlr\u001b [0m\u001b [0;34m=\u001b [0m\u001b [0;36m2e-5\u001b [0m\u001b [0;34m,\u001b [0m\u001b [0mepochs\u001b [0m\u001b [0;34m=\u001b [0m\u001b [0;36m1\u001b [0m\u001b [0;34m)\u001b [0m\u001b [0;34m\u001b [0m\u001b [0;34m\u001b [0m\u001b [0m\n \u001b [0m" ,
363+ " \u001b [0;31mNameError\u001b [0m: name 'a' is not defined"
364+ ]
365+ }
366+ ]
367+ }
368+ ]
369+ }
0 commit comments