Skip to content

Commit fab9bd8

Browse files
committed
图片上传到新建的 dataset_collection_images 表,逻辑跟随更改
1 parent 8bd33d3 commit fab9bd8

File tree

37 files changed

+1857
-1422
lines changed

37 files changed

+1857
-1422
lines changed
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
export interface DatasetCollectionImageSchema {
2+
_id: string;
3+
teamId: string;
4+
datasetId: string;
5+
collectionId?: string;
6+
createTime: Date;
7+
expiredTime: Date;
8+
size: number;
9+
name: string;
10+
path: string;
11+
contentType: string;
12+
metadata?: Record<string, any>;
13+
}
14+
15+
// API请求参数类型
16+
export interface UploadDatasetImageProps {
17+
datasetId: string;
18+
collectionId?: string;
19+
}

packages/service/core/ai/model.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,15 @@ export const getVlmModel = (model?: string) => {
2020
?.find((item) => item.model === model || item.name === model);
2121
};
2222

23+
export const getVlmModelList = () => {
24+
return Array.from(global.llmModelMap.values())?.filter((item) => item.vision) || [];
25+
};
26+
27+
export const hasAvailableVlmModel = () => {
28+
const vlmModels = getVlmModelList();
29+
return vlmModels.length > 0;
30+
};
31+
2332
export const getDefaultEmbeddingModel = () => global?.systemDefaultModel.embedding!;
2433
export const getEmbeddingModel = (model?: string) => {
2534
if (!model) return getDefaultEmbeddingModel();

packages/service/core/dataset/collection/controller.ts

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ import {
3838
getLLMMaxChunkSize
3939
} from '@fastgpt/global/core/dataset/training/utils';
4040
import { DatasetDataIndexTypeEnum } from '@fastgpt/global/core/dataset/data/constants';
41+
import { MongoDatasetCollectionImage } from '../schema';
4142

4243
export const createCollectionAndInsertData = async ({
4344
dataset,
@@ -403,7 +404,12 @@ export async function delCollection({
403404
]
404405
: []),
405406
// Delete vector data
406-
deleteDatasetDataVector({ teamId, datasetIds, collectionIds })
407+
deleteDatasetDataVector({ teamId, datasetIds, collectionIds }),
408+
// Delete collection images
409+
MongoDatasetCollectionImage.deleteMany({
410+
teamId,
411+
collectionId: { $in: collectionIds }
412+
})
407413
]);
408414

409415
// delete collections

packages/service/core/dataset/collection/controller_imageFileId.ts

Lines changed: 42 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ import type {
1313
import { MongoDatasetTraining } from '../training/schema';
1414
import { MongoDatasetData } from '../data/schema';
1515
import { delImgByRelatedId } from '../../../common/file/image/controller';
16-
// import { deleteDatasetDataVector } from '../../../common/vectorStore/controller';
1716
import { deleteDatasetDataVector } from '../../../common/vectorDB/controller';
1817
import { delFileByFileIdList } from '../../../common/file/gridfs/controller';
1918
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
@@ -38,6 +37,8 @@ import {
3837
computeChunkSplitter,
3938
getLLMMaxChunkSize
4039
} from '@fastgpt/global/core/dataset/training/utils';
40+
import { DatasetDataIndexTypeEnum } from '@fastgpt/global/core/dataset/data/constants';
41+
import { MongoDatasetCollectionImage } from '../schema';
4142

4243
export const createCollectionAndInsertData = async ({
4344
dataset,
@@ -84,8 +85,7 @@ export const createCollectionAndInsertData = async ({
8485
chunkSize,
8586
maxSize: getLLMMaxChunkSize(getLLMModel(dataset.agentModel)),
8687
overlapRatio: trainingType === DatasetCollectionDataProcessModeEnum.chunk ? 0.2 : 0,
87-
customReg: chunkSplitter ? [chunkSplitter] : [],
88-
isQAImport
88+
customReg: chunkSplitter ? [chunkSplitter] : []
8989
});
9090

9191
// 2. auth limit
@@ -102,8 +102,7 @@ export const createCollectionAndInsertData = async ({
102102
});
103103

104104
const fn = async (session: ClientSession) => {
105-
// 3. create collection,拿着传入的collectionId
106-
105+
// 3. Use the passed collectionId and do not create a new collection.
107106
// 4. create training bill
108107
const traingBillId = await (async () => {
109108
if (billId) return billId;
@@ -120,19 +119,26 @@ export const createCollectionAndInsertData = async ({
120119
return newBillId;
121120
})();
122121

123-
// 5. insert to training queue
124-
console.log(`[Image parse queue] Pushing data to training queue:`, {
125-
datasetId: dataset._id,
126-
collectionId: collectionId || '',
127-
mode: getTrainingModeByCollection({
128-
trainingType: trainingType,
129-
autoIndexes: createCollectionParams.autoIndexes,
130-
imageIndex: createCollectionParams.imageIndex,
131-
isImageCollection: createCollectionParams.metadata?.isImageCollection === true
132-
}),
133-
chunksCount: chunks.length
134-
});
122+
// 5. Update the collectionId field in the image record
123+
if (createCollectionParams.metadata?.isImageCollection && createCollectionParams.fileId) {
124+
await MongoDatasetCollectionImage.updateOne(
125+
{
126+
_id: createCollectionParams.fileId,
127+
teamId: teamId
128+
},
129+
{
130+
$set: {
131+
collectionId: collectionId
132+
},
133+
$unset: {
134+
expiredTime: 1
135+
}
136+
},
137+
{ session }
138+
);
139+
}
135140

141+
// 6. insert to training queue
136142
const insertResults = await pushDataListToTrainingQueue({
137143
teamId,
138144
tmbId,
@@ -152,6 +158,10 @@ export const createCollectionAndInsertData = async ({
152158
billId: traingBillId,
153159
data: chunks.map((item, index) => ({
154160
...item,
161+
indexes: item.indexes?.map((text) => ({
162+
type: DatasetDataIndexTypeEnum.custom,
163+
text
164+
})),
155165
chunkIndex: index
156166
})),
157167
session
@@ -177,7 +187,7 @@ export const createCollectionAndInsertData = async ({
177187
}
178188

179189
return {
180-
collectionId,
190+
collectionId: collectionId,
181191
insertResults
182192
};
183193
};
@@ -382,7 +392,12 @@ export async function delCollection({
382392
]
383393
: []),
384394
// Delete vector data
385-
deleteDatasetDataVector({ teamId, datasetIds, collectionIds })
395+
deleteDatasetDataVector({ teamId, datasetIds, collectionIds }),
396+
// Delete collection images
397+
MongoDatasetCollectionImage.deleteMany({
398+
teamId,
399+
collectionId: { $in: collectionIds }
400+
})
386401
]);
387402

388403
// delete collections
@@ -413,16 +428,20 @@ export async function pushImageFileToTrainingQueue({
413428
billId?: string;
414429
model?: string;
415430
}) {
431+
const mongoose = require('mongoose');
432+
const ObjectId = mongoose.Types.ObjectId;
433+
416434
await MongoDatasetTraining.create({
417-
teamId,
418-
tmbId,
419-
datasetId,
420-
collectionId,
435+
teamId: new ObjectId(teamId),
436+
tmbId: new ObjectId(tmbId),
437+
datasetId: new ObjectId(datasetId),
438+
collectionId: new ObjectId(collectionId),
421439
billId,
422440
mode: TrainingModeEnum.imageParse,
423441
model,
424442
imageFileId,
425443
retryCount: 5,
444+
lockTime: new Date('2000/1/1'),
426445
indexes: []
427446
});
428447
}

packages/service/core/dataset/controller.ts

Lines changed: 62 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import { type DatasetSchemaType } from '@fastgpt/global/core/dataset/type';
22
import { MongoDatasetCollection } from './collection/schema';
3-
import { MongoDataset } from './schema';
3+
import { MongoDataset, MongoDatasetCollectionImage } from './schema';
44
import { delCollectionRelatedSource } from './collection/controller';
55
import { type ClientSession } from '../../common/mongo';
66
import { MongoDatasetTraining } from './training/schema';
@@ -9,6 +9,7 @@ import { deleteDatasetDataVector } from '../../common/vectorDB/controller';
99
import { MongoDatasetDataText } from './data/dataTextSchema';
1010
import { DatasetErrEnum } from '@fastgpt/global/common/error/code/dataset';
1111
import { retryFn } from '@fastgpt/global/common/system/utils';
12+
import { addMinutes } from 'date-fns';
1213

1314
/* ============= dataset ========== */
1415
/* find all datasetId by top datasetId */
@@ -105,7 +106,12 @@ export async function delDatasetRelevantData({
105106
// Delete Image and file
106107
delCollectionRelatedSource({ collections }),
107108
// Delete vector data
108-
deleteDatasetDataVector({ teamId, datasetIds })
109+
deleteDatasetDataVector({ teamId, datasetIds }),
110+
// Delete dataset collection images
111+
MongoDatasetCollectionImage.deleteMany({
112+
teamId,
113+
datasetId: { $in: datasetIds }
114+
})
109115
]);
110116
});
111117

@@ -115,3 +121,57 @@ export async function delDatasetRelevantData({
115121
datasetId: { $in: datasetIds }
116122
}).session(session);
117123
}
124+
125+
/* ============= dataset images ========== */
126+
127+
export async function createDatasetImage({
128+
teamId,
129+
datasetId,
130+
collectionId,
131+
name,
132+
path,
133+
contentType,
134+
size,
135+
metadata = {}
136+
}: {
137+
teamId: string;
138+
datasetId: string;
139+
collectionId?: string;
140+
name: string;
141+
path: string;
142+
contentType: string;
143+
size: number;
144+
metadata?: Record<string, any>;
145+
}): Promise<string> {
146+
// Set TTL to 30min
147+
const expiredTime = addMinutes(new Date(), 30);
148+
149+
const image = await MongoDatasetCollectionImage.create({
150+
teamId: String(teamId),
151+
datasetId: String(datasetId),
152+
collectionId: collectionId ? String(collectionId) : null,
153+
name,
154+
path,
155+
contentType,
156+
size,
157+
metadata,
158+
createTime: new Date(),
159+
expiredTime
160+
});
161+
162+
return String(image._id);
163+
}
164+
165+
export async function getDatasetImage(imageId: string): Promise<any> {
166+
try {
167+
if (!imageId || imageId.length !== 24) {
168+
return null;
169+
}
170+
171+
const result = await MongoDatasetCollectionImage.findById(imageId).lean();
172+
173+
return result;
174+
} catch (error) {
175+
return null;
176+
}
177+
}

packages/service/core/dataset/schema.ts

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ import {
1313
TeamMemberCollectionName
1414
} from '@fastgpt/global/support/user/team/constant';
1515
import type { DatasetSchemaType } from '@fastgpt/global/core/dataset/type.d';
16+
import type { DatasetCollectionImageSchema } from '@fastgpt/global/core/dataset/imageCollection';
17+
import mongoose from 'mongoose';
1618

1719
export const DatasetCollectionName = 'datasets';
1820

@@ -146,3 +148,69 @@ try {
146148
}
147149

148150
export const MongoDataset = getMongoModel<DatasetSchemaType>(DatasetCollectionName, DatasetSchema);
151+
152+
export const DatasetCollectionImageCollectionName = 'dataset_collection_images';
153+
154+
if (!mongoose.modelNames().includes('dataset_collection_images')) {
155+
const DatasetCollectionImageSchema = new Schema({
156+
teamId: {
157+
type: String,
158+
required: true
159+
},
160+
datasetId: {
161+
type: String,
162+
required: true
163+
},
164+
collectionId: {
165+
type: String,
166+
required: false,
167+
default: null
168+
},
169+
name: {
170+
type: String,
171+
required: true
172+
},
173+
path: {
174+
type: String,
175+
required: true
176+
},
177+
contentType: {
178+
type: String,
179+
required: true
180+
},
181+
size: {
182+
type: Number,
183+
required: true
184+
},
185+
metadata: {
186+
type: Object,
187+
default: {}
188+
},
189+
createTime: {
190+
type: Date,
191+
default: Date.now
192+
},
193+
expiredTime: {
194+
type: Date,
195+
required: true,
196+
index: { expireAfterSeconds: 0 }
197+
}
198+
});
199+
200+
DatasetCollectionImageSchema.index({ expiredTime: 1 }, { expireAfterSeconds: 0 });
201+
202+
DatasetCollectionImageSchema.index({ teamId: 1 });
203+
DatasetCollectionImageSchema.index({ datasetId: 1 });
204+
DatasetCollectionImageSchema.index({ collectionId: 1 });
205+
206+
mongoose.model(
207+
'dataset_collection_images',
208+
DatasetCollectionImageSchema,
209+
'dataset_collection_images'
210+
);
211+
}
212+
213+
export const MongoDatasetCollectionImage = getMongoModel<DatasetCollectionImageSchema>(
214+
DatasetCollectionImageCollectionName,
215+
mongoose.model('dataset_collection_images').schema
216+
);

0 commit comments

Comments
 (0)