-
Notifications
You must be signed in to change notification settings - Fork 0
/
NLP_token.py
35 lines (26 loc) · 933 Bytes
/
NLP_token.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
# Download NLTK resources if not already downloaded
nltk.download("punkt")
nltk.download("stopwords")
# Sample text for processing
text = "Natural language processing (NLP) is a subfield of artificial intelligence (AI) that deals with the interaction between computers and humans through natural language."
# Tokenization
tokens = word_tokenize(text)
# Stopwords removal
stop_words = set(stopwords.words("english"))
filtered_tokens = [word for word in tokens if word.lower() not in stop_words]
# Stemming
stemmer = PorterStemmer()
stemmed_tokens = [stemmer.stem(word) for word in filtered_tokens]
# Print the results
print("Original Text:")
print(text)
print("\nTokenized Text:")
print(tokens)
print("\nAfter Stopwords Removal:")
print(filtered_tokens)
print("\nAfter Stemming:")
print(stemmed_tokens)