@@ -97,22 +97,24 @@ def build_faiss(model, name):
97
97
c = 2000 #collection.find({}, projection={'title': True, 'description': True, "content.text": True}).count()
98
98
encoder = None
99
99
index = None
100
+ # idmap = None
100
101
if hasattr (model , 'encode' ):
101
102
encoder = lambda x : model .encode (x ).astype ("float32" )
102
103
else :
103
104
encoder = lambda x :model .transform (x ).toarray ().astype ("float32" )
104
105
i = 0
105
- docs = []
106
106
ids = []
107
107
while i < c :
108
108
print (i )
109
+ docs = []
109
110
for x in collection .find ({}, projection = {'_id' : True , 'title' : True , 'description' : True , "content.text" : True }).skip (i ).limit (500 ):
110
111
docs .append (x .get ("title" ,"" ) + " " + x .get ('description' ,"" )+ " " + " " .join (filter (None ,x .get ('content' ,{}).get ('text' ,[]))))
111
112
ids .append (x ['_id' ])
113
+ print ('docs' ,len (docs ))
112
114
embeddings = encoder (docs )
113
115
if i == 0 :
114
116
index = faiss .IndexFlatIP (embeddings .shape [1 ])
115
- i += len ( embeddings )
117
+ # idmap = faiss.IndexIDMap(index )
116
118
117
119
118
120
# embeddings = np.array([embedding for embedding in embeddings]).astype("float32")
@@ -124,7 +126,11 @@ def build_faiss(model, name):
124
126
# Step 3: Pass the index to IndexIDMap
125
127
# index = faiss.IndexIDMap(index)
126
128
# Step 4: Add vectors and their IDs
127
- index .add_with_ids (embeddings ,list (range (i ,len (embeddings ))))
129
+ print ("range" ,len (np .arange (i ,i + len (embeddings ))))
130
+ print ("embeds" ,len (embeddings ))
131
+ # idmap.add_with_ids(embeddings,np.arange(i,i+len(embeddings)))
132
+ index .add (embeddings )
133
+ i += len (embeddings )
128
134
faiss .write_index (index ,f"models/{ name } .index" )
129
135
dump (ids ,'models/ids.joblib' )
130
136
print (f"Completed { name } index." )
0 commit comments