Source code for wbia_cnn.process

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""

"""
from __future__ import absolute_import, division, print_function
from wbia.detecttools.directory import Directory
from os.path import join, abspath, exists, basename
import utool as ut
import numpy as np

(print, rrr, profile) = ut.inject2(__name__)


[docs]def process_image_directory(project_name, size, reset=True): import cv2 # Raw folders raw_path = abspath(join('..', 'data', 'raw')) processed_path = abspath(join('..', 'data', 'processed')) # Project folders project_raw_path = join(raw_path, project_name) project_processed_path = join(processed_path, project_name) # Load raw data direct = Directory(project_raw_path, include_extensions='images') # Reset / create paths if not exist if exists(project_processed_path) and reset: ut.remove_dirs(project_processed_path) ut.ensuredir(project_processed_path) # Process by resizing the images into the desired shape for file_path in direct.files(): file_name = basename(file_path) print('Processing %r' % (file_name,)) image = cv2.imread(file_path) image = cv2.resize(image, size, interpolation=cv2.INTER_LANCZOS4) dest_path = join(project_processed_path, file_name) cv2.imwrite(dest_path, image)
[docs]def numpy_processed_directory( project_name, numpy_ids_file_name='ids.npy', numpy_x_file_name='X.npy', numpy_y_file_name='y.npy', labels_file_name='labels.csv', reset=True, ): import cv2 # Raw folders processed_path = abspath(join('..', 'data', 'processed')) labels_path = abspath(join('..', 'data', 'labels')) numpy_path = abspath(join('..', 'data', 'numpy')) # Project folders project_processed_path = join(processed_path, project_name) project_labels_path = join(labels_path, project_name) project_numpy_path = join(numpy_path, project_name) # Project files project_numpy_ids_file_name = join(project_numpy_path, numpy_ids_file_name) project_numpy_x_file_name = join(project_numpy_path, numpy_x_file_name) project_numpy_y_file_name = join(project_numpy_path, numpy_y_file_name) project_numpy_labels_file_name = join(project_labels_path, labels_file_name) # Load raw data direct = Directory(project_processed_path, include_extensions='images') label_dict = {} for line in open(project_numpy_labels_file_name): line = line.strip().split(',') file_name = line[0].strip() label = line[1].strip() label_dict[file_name] = label # Reset / create paths if not exist if exists(project_numpy_path) and reset: ut.remove_dirs(project_numpy_path) ut.ensuredir(project_numpy_path) # Get shape for all images shape_x = list(cv2.imread(direct.files()[0]).shape) if len(shape_x) == 2: shape_x = shape_x + [1] shape_x = tuple([len(direct.files())] + shape_x[::-1]) # NOQA shape_y = shape_x[0:1] # NOQA # Create numpy arrays # X = np.empty(shape_x, dtype=np.uint8) # y = np.empty(shape_y, dtype=np.uint8) ids = [] X = [] y = [] # Process by loading images into the numpy array for saving for index, file_path in enumerate(direct.files()): file_name = basename(file_path) print('Processing %r' % (file_name,)) image = cv2.imread(file_path) try: label = label_dict[file_name] # X[index] = np.array(cv2.split(image)) # y[index] = label # X.append(np.array(cv2.split(image))) # Lasagne format ids.append(file_name) X.append(image) # cv2 format y.append(label) except KeyError: print('Cannot find label...skipping') # raw_input() ids = np.array(ids) X = np.array(X, dtype=np.uint8) # y = np.array(y, dtype=np.uint8) y = np.array(y) # Save numpy array print(' ids.shape = %r' % (ids.shape,)) print(' ids.dtype = %r' % (ids.dtype,)) print(' X.shape = %r' % (X.shape,)) print(' X.dtype = %r' % (X.dtype,)) print(' y.shape = %r' % (y.shape,)) print(' y.dtype = %r' % (y.dtype,)) np.save(project_numpy_ids_file_name, ids) np.save(project_numpy_x_file_name, X) np.save(project_numpy_y_file_name, y)
[docs]def numpy_processed_directory2( extracted_path, numpy_ids_file_name='ids.npy', numpy_x_file_name='X.npy', numpy_y_file_name='y.npy', labels_file_name='labels.csv', reset=True, verbose=False, ): import cv2 print('Caching images into Numpy files...') raw_path = join(extracted_path, 'raw') labels_path = join(extracted_path, 'labels') # Project files project_numpy_ids_file_name = join(raw_path, numpy_ids_file_name) project_numpy_x_file_name = join(raw_path, numpy_x_file_name) project_numpy_y_file_name = join(labels_path, numpy_y_file_name) project_numpy_labels_file_name = join(labels_path, labels_file_name) # Load raw data direct = Directory(raw_path, include_extensions='images') label_dict = {} for line in open(project_numpy_labels_file_name): line = line.strip().split(',') file_name = line[0].strip() label = line[1].strip() label_dict[file_name] = label # Get shape for all images shape_x = list(cv2.imread(direct.files()[0]).shape) if len(shape_x) == 2: shape_x = shape_x + [1] shape_x = tuple([len(direct.files())] + shape_x[::-1]) # NOQA shape_y = shape_x[0:1] # NOQA # Create numpy arrays # X = np.empty(shape_x, dtype=np.uint8) # y = np.empty(shape_y, dtype=np.uint8) ids = [] X = [] y = [] # Process by loading images into the numpy array for saving for index, file_path in enumerate(direct.files()): file_name = basename(file_path) if verbose: print('Processing %r' % (file_name,)) image = cv2.imread(file_path) try: label = label_dict[file_name] # X[index] = np.array(cv2.split(image)) # y[index] = label # X.append(np.array(cv2.split(image))) # Lasagne format ids.append(file_name) X.append(image) # cv2 format y.append(label) except KeyError: print('Cannot find label...skipping') # raw_input() ids = np.array(ids) X = np.array(X, dtype=np.uint8) # y = np.array(y, dtype=np.uint8) y = np.array(y) # Save numpy array print(' ids.shape = %r' % (ids.shape,)) print(' ids.dtype = %r' % (ids.dtype,)) print(' X.shape = %r' % (X.shape,)) print(' X.dtype = %r' % (X.dtype,)) print(' y.shape = %r' % (y.shape,)) print(' y.dtype = %r' % (y.dtype,)) np.save(project_numpy_ids_file_name, ids) np.save(project_numpy_x_file_name, X) np.save(project_numpy_y_file_name, y) return ( project_numpy_ids_file_name, project_numpy_x_file_name, project_numpy_y_file_name, )
[docs]def numpy_processed_directory3( extracted_path, numpy_ids_file_name='ids.npy', numpy_x_file_name='X.npy', numpy_y_file_name='y.npy', labels_file_name='labels.csv', categories_file_name='categories.csv', reset=True, verbose=False, ): import cv2 print('Caching images into Numpy files with category vector...') raw_path = join(extracted_path, 'raw') labels_path = join(extracted_path, 'labels') # Project files project_numpy_ids_file_name = join(raw_path, numpy_ids_file_name) project_numpy_x_file_name = join(raw_path, numpy_x_file_name) project_numpy_y_file_name = join(labels_path, numpy_y_file_name) project_numpy_labels_file_name = join(labels_path, labels_file_name) project_numpy_categories_file_name = join(labels_path, categories_file_name) category_list = [] for line in open(project_numpy_categories_file_name): category = line.strip() if len(category) > 0: category_list.append(category) # Load raw data direct = Directory(raw_path, include_extensions='images') label_dict = {} count_dict = {} for line in open(project_numpy_labels_file_name): line = line.strip().split(',') file_name = line[0].strip() label = line[1].strip() label_list = label.split(';') label_set = set(label_list) label = [1 if category_ in label_set else 0 for category_ in category_list] assert 1 in label count = label.count(1) if count not in count_dict: count_dict[count] = 0 count_dict[count] += 1 label_dict[file_name] = label print('count_dict = %s' % (ut.repr3(count_dict),)) # Get shape for all images shape_x = list(cv2.imread(direct.files()[0]).shape) if len(shape_x) == 2: shape_x = shape_x + [1] shape_x = tuple([len(direct.files())] + shape_x[::-1]) # NOQA shape_y = shape_x[0:1] # NOQA # Create numpy arrays # X = np.empty(shape_x, dtype=np.uint8) # y = np.empty(shape_y, dtype=np.uint8) ids = [] X = [] y = [] # Process by loading images into the numpy array for saving for index, file_path in enumerate(direct.files()): file_name = basename(file_path) if verbose: print('Processing %r' % (file_name,)) image = cv2.imread(file_path) try: label = np.array(label_dict[file_name]) # X[index] = np.array(cv2.split(image)) # y[index] = label # X.append(np.array(cv2.split(image))) # Lasagne format ids.append(file_name) X.append(image) # cv2 format y.append(label) except KeyError: print('Cannot find label...skipping') # raw_input() ids = np.array(ids) X = np.array(X, dtype=np.uint8) # y = np.array(y, dtype=np.uint8) y = np.vstack(y) # Save numpy array print(' ids.shape = %r' % (ids.shape,)) print(' ids.dtype = %r' % (ids.dtype,)) print(' X.shape = %r' % (X.shape,)) print(' X.dtype = %r' % (X.dtype,)) print(' y.shape = %r' % (y.shape,)) print(' y.dtype = %r' % (y.dtype,)) print(' categories = %r' % (category_list,)) np.save(project_numpy_ids_file_name, ids) np.save(project_numpy_x_file_name, X) np.save(project_numpy_y_file_name, y) return ( project_numpy_ids_file_name, project_numpy_x_file_name, project_numpy_y_file_name, )
[docs]def numpy_processed_directory4( extracted_path, numpy_ids_file_name='ids.npy', numpy_x_file_name='X.npy', numpy_y_file_name='y.npy', labels_file_name='labels.csv', reset=True, verbose=False, ): print('Caching images into Numpy files with category vector...') raw_path = join(extracted_path, 'raw') labels_path = join(extracted_path, 'labels') # Project files project_numpy_ids_file_name = join(raw_path, numpy_ids_file_name) project_numpy_x_file_name = join(raw_path, numpy_x_file_name) project_numpy_y_file_name = join(labels_path, numpy_y_file_name) project_numpy_labels_file_name = join(labels_path, labels_file_name) # Load raw data direct = Directory(raw_path, include_extensions=['npy']) label_dict = {} for line in open(project_numpy_labels_file_name): line = line.strip().split(',') file_name = line[0].strip() label = line[1].strip() label_list = label.split(';') label_list = [list(map(float, _.split('^'))) for _ in label_list] label = np.array(label_list) label_dict[file_name] = label # Create numpy arrays ids = [] X = [] y = [] # Process by loading images into the numpy array for saving for index, file_path in enumerate(direct.files()): file_name = basename(file_path) if verbose: print('Processing %r' % (file_name,)) with open(file_path, 'r') as file_: data = np.load(file_) try: label = label_dict[file_name] ids.append(file_name) X.append(data) y.append(label) except KeyError: print('Cannot find label...skipping') ids = np.array(ids) X = np.array(X, dtype=np.float32) y = np.array(y) # Save numpy array print(' ids.shape = %r' % (ids.shape,)) print(' ids.dtype = %r' % (ids.dtype,)) print(' X.shape = %r' % (X.shape,)) print(' X.dtype = %r' % (X.dtype,)) print(' y.shape = %r' % (y.shape,)) print(' y.dtype = %r' % (y.dtype,)) np.save(project_numpy_ids_file_name, ids) np.save(project_numpy_x_file_name, X) np.save(project_numpy_y_file_name, y) return ( project_numpy_ids_file_name, project_numpy_x_file_name, project_numpy_y_file_name, )
[docs]def numpy_processed_directory5( extracted_path, numpy_ids_file_name='ids.npy', numpy_x_file_name='X.npy', numpy_y_file_name='y.npy', labels_file_name='labels.csv', reset=True, verbose=False, ): import cv2 print('Caching images into Numpy files with category vector...') raw_path = join(extracted_path, 'raw') labels_path = join(extracted_path, 'labels') # Project files project_numpy_ids_file_name = join(raw_path, numpy_ids_file_name) project_numpy_x_file_name = join(raw_path, numpy_x_file_name) project_numpy_y_file_name = join(labels_path, numpy_y_file_name) project_numpy_labels_file_name = join(labels_path, labels_file_name) # Load raw data direct = Directory(raw_path, include_extensions='images') label_dict = {} for line in open(project_numpy_labels_file_name): line = line.strip().split(',') file_name = line[0].strip() label = line[1].strip() label_list = label.split(';') label_list = [list(map(float, _.split('^'))) for _ in label_list] label = np.array(label_list) label_dict[file_name] = label # Create numpy arrays ids = [] X = [] y = [] # Process by loading images into the numpy array for saving for index, file_path in enumerate(direct.files()): file_name = basename(file_path) if verbose: print('Processing %r' % (file_name,)) image = cv2.imread(file_path, -1) try: label = label_dict[file_name] ids.append(file_name) X.append(image) y.append(label) except KeyError: print('Cannot find label...skipping') ids = np.array(ids) X = np.array(X, dtype=np.uint8) y = np.array(y) # Save numpy array print(' ids.shape = %r' % (ids.shape,)) print(' ids.dtype = %r' % (ids.dtype,)) print(' X.shape = %r' % (X.shape,)) print(' X.dtype = %r' % (X.dtype,)) print(' y.shape = %r' % (y.shape,)) print(' y.dtype = %r' % (y.dtype,)) np.save(project_numpy_ids_file_name, ids) np.save(project_numpy_x_file_name, X) np.save(project_numpy_y_file_name, y) return ( project_numpy_ids_file_name, project_numpy_x_file_name, project_numpy_y_file_name, )
[docs]def view_numpy_data(project_namel, numpy_x_file_name='X.npy', numpy_y_file_name='y.npy'): # Raw folders numpy_path = abspath(join('..', 'data', 'numpy')) # Project folders project_numpy_path = join(numpy_path, project_name) # Project files project_numpy_x_file_name = join(project_numpy_path, numpy_x_file_name) project_numpy_y_file_name = join(project_numpy_path, numpy_y_file_name) X = np.load(project_numpy_x_file_name) y = np.load(project_numpy_y_file_name) print(' X.shape = %r' % (X.shape,)) print(' X.dtype = %r' % (X.dtype,)) print(' y.shape = %r' % (y.shape,)) print(' y.dtype = %r' % (y.dtype,))
if __name__ == '__main__': # project_name = 'viewpoint_large' # # size = (96, 96) # # process_image_directory(project_name, size) # numpy_processed_directory(project_name) # project_name = 'viewpoint_pz' # # size = (64, 64) # # process_image_directory(project_name, size) # numpy_processed_directory(project_name) # project_name = 'quality_pz' # # size = (64, 64) # # process_image_directory(project_name, size) # numpy_processed_directory(project_name) project_name = 'background_patches' # size = (64, 64) # process_image_directory(project_name, size) numpy_processed_directory(project_name) view_numpy_data(project_name)