You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
import pandas as pd
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import gensim
from gensim.models import FastText
from sklearn.feature_extraction.text import TfidfVectorizer
from difflib import SequenceMatcher
#Loading Query Text for Corpus Building
qt=pd.read_csv('C:/Demo/Query_Text.csv')
qt.shape
when i am executing the below code ,i am getting
ValueError: empty vocabulary; perhaps the documents only contain stop words in TfidfVectorizer error:
vectorizer = TfidfVectorizer(decode_error='ignore',strip_accents='unicode',stop_words='english',min_df=1,analyzer='word')
tfidf= vectorizer.fit_transform([str(convid['Query_Text'][i]).lower(),str(convid['Query_Text'][i+1]).lower()])
Python Code:
"""
import pandas as pd
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import gensim
from gensim.models import FastText
from sklearn.feature_extraction.text import TfidfVectorizer
from difflib import SequenceMatcher
#Loading Query Text for Corpus Building
qt=pd.read_csv('C:/Demo/Query_Text.csv')
qt.shape
#Loading QueryText for comparing
convid =pd.read_csv('C:/Demo/ConvId_May06.csv')
convid.shape
sentences = qt
convid = convid.sort_values(['User_PUID','EventInfo_Time'], ascending=[True,True])
convid = convid.reset_index()
convid['FastTextResult'] =float()
convid['Tfidf'] = float()
convid['TfidfWc'] = float()
model_ted = FastText(qt, window=1, min_count=1, sg=0)
for i in range(len(convid['Query_Text'])):
print('i',i)
if(i == len(convid)-1):
break
convid.to_csv('C:/Demo/ConvIdOutput_May061.csv')
The text was updated successfully, but these errors were encountered: