-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathExtract_Features.py
95 lines (74 loc) · 3.72 KB
/
Extract_Features.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import torch
import numpy as np
import os
import pickle
import cv2 as cv
from models import MyDataset
from torch.utils.data import DataLoader
from models.ccnet import ccnet
import os
def getFileNames(txt_file_path):
"""Reads a text file containing image paths and extracts the last integer before the extension as labels."""
with open(txt_file_path, 'r') as file:
lines = file.readlines()
file_paths = []
labels = []
for line in lines:
path = line.strip().split(' ')[0] # Get the image path
# Extract the last integer before the file extension as the label
label = os.path.splitext(os.path.basename(path))[0] # Remove extension from the file name
label = label.split('/')[-1] # Get the last part of the file path (last integer before .jpg)
file_paths.append(path)
labels.append(label) # Use the extracted integer as the label
return file_paths, labels
def extract_features_and_save(model, train_set_file, feature_save_path, batch_size=1):
"""
Extract feature vectors for all images in the dataset and save them.
Parameters:
- model: The trained model
- train_set_file: Path to the text file containing image paths for the training set
- feature_save_path: Path to save the extracted feature vectors
- batch_size: The batch size used for DataLoader
"""
# Create DataLoader for the training set using MyDataset for the image loading
trainset = MyDataset(txt=train_set_file, transforms=None, train=True, imside=128, outchannels=1)
data_loader = DataLoader(dataset=trainset, batch_size=batch_size, num_workers=12, shuffle=False)
features = []
labels = []
# Iterate through the DataLoader
for batch_id, (datas, target) in enumerate(data_loader):
# Process the batch of images (each image is processed one at a time here)
data = datas[0].cuda() # Ensure image tensor is on GPU
target = target.cuda() # Ensure label tensor is on GPU
# Extract feature for each image in the batch
model.eval()
with torch.no_grad():
feature = model.getFeatureCode(data) # Extract feature vector
feature = feature.cpu().detach().numpy().flatten() # Convert to numpy array
# Append extracted features and corresponding labels
features.append(feature)
labels.append(target.cpu().detach().numpy()) # Convert label to numpy array
print(f"Extracted feature for batch {batch_id + 1}/{len(data_loader)}")
# Convert features and labels into numpy arrays
features = np.array(features)
labels = np.concatenate(labels) # Concatenate all labels into one array
# Ensure the save path exists
if not os.path.exists(feature_save_path):
os.makedirs(feature_save_path)
# Save the features and labels as pickle files
with open(os.path.join(feature_save_path, 'features.pkl'), 'wb') as f:
pickle.dump(features, f)
with open(os.path.join(feature_save_path, 'labels.pkl'), 'wb') as f:
pickle.dump(labels, f)
print(f"Feature vectors saved to {feature_save_path}")
# Example usage
if __name__ == "__main__":
# Initialize the ccnet model
model = ccnet(num_classes=767, weight=0.8) # Modify the arguments if necessary
model.load_state_dict(torch.load('newmodel.pth')) # Load the pre-trained model
model.cuda() # Move the model to GPU if available
# Path to the training set text file containing image paths
train_set_file = "paths.txt" # This is the text file with image paths
feature_save_path = "./features" # Path where the features will be saved
# Extract features and save them
extract_features_and_save(model, train_set_file, feature_save_path)