# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function
import utool as ut
import numpy as np
import vtool as vt
import six
import itertools
from six.moves import zip, map, range
from functools import partial
from wbia_cnn import draw_results # NOQA
print, rrr, profile = ut.inject2(__name__)
FIX_HASH = True
[docs]def get_aidpairs_partmatch(ibs, acfg_name):
"""
CommandLine:
python -m wbia_cnn.ingest_wbia --exec-get_aidpairs_partmatch
SeeAlso:
extract_annotpair_training_chips
Example:
>>> # DISABLE_DOCTEST
>>> from wbia_cnn.ingest_wbia import * # NOQA
>>> import wbia
>>> # build test data
>>> ibs = wbia.opendb(defaultdb='PZ_Master1')
>>> #ibs = wbia.opendb(defaultdb='PZ_MTEST')
>>> #ibs = wbia.opendb(defaultdb='PZ_FlankHack')
>>> acfg_name = ut.get_argval(('--aidcfg', '--acfg', '-a'),
... type_=str,
... default='ctrl:pername=None,excluderef=False,contributor_contains=FlankHack')
>>> aid_pairs, label_list, flat_metadata = get_aidpairs_partmatch(ibs, acfg_name)
"""
print('NEW WAY OF FILTERING')
from wbia.expt import experiment_helpers
acfg_list, expanded_aids_list = experiment_helpers.get_annotcfg_list(ibs, [acfg_name])
# acfg = acfg_list[0]
expanded_aids = expanded_aids_list[0]
qaid_list, daid_list = expanded_aids
available_aids = np.unique(ut.flatten([qaid_list, daid_list]))
tup = ibs.partition_annots_into_corresponding_groups(qaid_list, daid_list)
aids1_list, aids2_list, other_aids1, other_aids2 = tup
multiton_aids = np.unique(ut.flatten(aids1_list + aids2_list))
# singletons = ut.flatten(other_aids2 + other_aids1)
ibs.print_annotconfig_stats(qaid_list, daid_list, bigstr=True)
# Positive Examples
print('Sampling positive examples')
nested_pairs = list(
map(list, itertools.starmap(ut.iprod, zip(aids1_list, aids2_list)))
)
pos_aid_pairs = np.vstack(nested_pairs)
# Filter Self
flag_list = pos_aid_pairs.T[0] != pos_aid_pairs.T[1]
pos_aid_pairs = pos_aid_pairs.compress(flag_list, axis=0)
# Filter bad viewpoints
TAU = np.pi * 2
_np_get_annot_yaws = ut.accepts_numpy(ibs.get_annot_yaws_asfloat.im_func)
yaw_pairs = _np_get_annot_yaws(ibs, pos_aid_pairs)
yawdist = vt.ori_distance(yaw_pairs.T[0], yaw_pairs.T[1])
flag_list = np.logical_or(np.isnan(yawdist), yawdist < TAU / 8.0)
pos_aid_pairs = pos_aid_pairs.compress(flag_list, axis=0)
# pos_aid_pairs = vt.unique_rows(pos_aid_pairs) # should be unncessary
assert len(vt.unique_rows(pos_aid_pairs)) == len(pos_aid_pairs)
print('pos_aid_pairs.shape = %r' % (pos_aid_pairs.shape,))
# Hard Negative Examples
print('Sampling hard negative examples')
num_hard_neg_per_aid = max(1, len(pos_aid_pairs) // len(multiton_aids))
cfgdict = {
'affine_invariance': False,
'fg_on': not ut.WIN32,
}
qreq_ = ibs.new_query_request(qaid_list, daid_list, cfgdict=cfgdict)
cm_list = qreq_.execute()
hardneg_aids1 = [[cm.qaid] for cm in (cm_list)]
hardneg_aids2 = [cm.get_top_gf_aids(ibs, ntop=num_hard_neg_per_aid) for cm in cm_list]
hardneg_aid_pairs = np.array(
ut.flatten(itertools.starmap(ut.iprod, zip(hardneg_aids1, hardneg_aids2)))
)
# Random Negative Examples
# TODO: may be able to say not a match from viewpoint?
print('Sampling random negative examples')
num_rand_neg_per_aid = max(1, len(pos_aid_pairs) // len(multiton_aids))
rng = np.random.RandomState(0)
randneg_aid_pairs = []
neg_aid_pool = available_aids
neg_nid_pool = np.array(ibs.get_annot_nids(neg_aid_pool))
for aid, nid in zip(neg_aid_pool, neg_nid_pool):
is_valid = np.not_equal(neg_nid_pool, nid)
p = is_valid / (is_valid.sum())
chosen = rng.choice(neg_aid_pool, size=num_rand_neg_per_aid, replace=False, p=p)
chosen_pairs = list(ut.iprod([aid], chosen))
randneg_aid_pairs.extend(chosen_pairs)
randneg_aid_pairs = np.array(randneg_aid_pairs)
# Concatenate both types of negative examples
# print('Building negative examples')
# _neg_aid_pairs = np.vstack((hardneg_aid_pairs, randneg_aid_pairs))
# neg_aid_pairs = vt.unique_rows(_neg_aid_pairs)
# Unsure Examples
# TODO: use quality and viewoint labelings to determine this
# as well as metadata from the annotmatch table
print('hardneg_aid_pairs.shape = %r' % (hardneg_aid_pairs.shape,))
print('randneg_aid_pairs.shape = %r' % (randneg_aid_pairs.shape,))
print('pos_aid_pairs.shape = %r' % (pos_aid_pairs.shape,))
print('Building labels')
const = ibs.const
unflat_pairs = (pos_aid_pairs, hardneg_aid_pairs, randneg_aid_pairs)
type_labels = (const.REVIEW.MATCH, const.REVIEW.NON_MATCH, const.REVIEW.NON_MATCH)
type_meta_labels = ('pos', 'hardneg', 'randneg')
def _expand(type_list):
return [[item] * len(pairs) for pairs, item in zip(unflat_pairs, type_list)]
_aid_pairs = np.vstack(unflat_pairs)
_labels = np.hstack(_expand(type_labels))
flat_metadata = {'meta_label': ut.flatten(_expand(type_meta_labels))}
print('Filtering Duplicates')
nonunique_flags = vt.nonunique_row_flags(_aid_pairs)
print('Filtered %d duplicate pairs' % (nonunique_flags.sum()))
print('Nonunique stats:')
for key, val in flat_metadata.items():
print(ut.dict_hist(ut.compress(val, nonunique_flags)))
unique_flags = ~nonunique_flags
# Do filtering
aid_pairs = _aid_pairs.compress(unique_flags, axis=0)
label_list = _labels.compress(unique_flags, axis=0)
for key, val in flat_metadata.items():
flat_metadata[key] = ut.compress(val, unique_flags)
print('Final Stats')
for key, val in flat_metadata.items():
print(ut.dict_hist(val))
groupsizes = map(len, vt.group_indices(aid_pairs.T[0])[1])
ut.print_dict(ut.dict_hist(groupsizes), 'groupsize freq')
flat_metadata['aid_pairs'] = aid_pairs
return aid_pairs, label_list, flat_metadata
[docs]def get_aidpair_patchmatch_training_data(
ibs,
aid1_list,
aid2_list,
kpts1_m_list,
kpts2_m_list,
fm_list,
metadata_lists,
patch_size,
colorspace,
):
"""
FIXME: errors on get_aidpairs_and_matches(ibs, 1)
CommandLine:
python -m wbia_cnn.ingest_wbia --test-get_aidpair_patchmatch_training_data --show
Example:
>>> # DISABLE_DOCTEST
>>> from wbia_cnn.ingest_wbia import * # NOQA
>>> import wbia
>>> # build test data
>>> ibs = wbia.opendb(defaultdb='PZ_MTEST')
>>> tup = get_aidpairs_and_matches(ibs, 6)
>>> (aid1_list, aid2_list, kpts1_m_list, kpts2_m_list, fm_list, metadata_lists) = tup
>>> pmcfg = PatchMetricDataConfig()
>>> patch_size = pmcfg['patch_size']
>>> colorspace = pmcfg['colorspace']
>>> # execute function
>>> tup = get_aidpair_patchmatch_training_data(ibs, aid1_list,
... aid2_list, kpts1_m_list, kpts2_m_list, fm_list, metadata_lists,
... patch_size, colorspace)
>>> aid1_list_, aid2_list_, warped_patch1_list, warped_patch2_list, flat_metadata = tup
>>> ut.quit_if_noshow()
>>> label_list = get_aidpair_training_labels(ibs, aid1_list_, aid2_list_)
>>> draw_results.interact_patches(label_list, (warped_patch1_list, warped_patch2_list), flat_metadata)
>>> ut.show_if_requested()
"""
# Flatten to only apply chip operations once
print('get_aidpair_patchmatch_training_data num_pairs = %r' % (len(aid1_list)))
assert len(aid1_list) == len(aid2_list)
assert len(aid1_list) == len(kpts1_m_list)
assert len(aid1_list) == len(kpts2_m_list)
assert len(aid1_list) == len(fm_list)
print('geting_unflat_chips')
flat_unique, reconstruct_tup = ut.inverable_unique_two_lists(aid1_list, aid2_list)
print('grabbing %d unique chips' % (len(flat_unique)))
chip_list = ibs.get_annot_chips(flat_unique) # TODO config2_
# convert to approprate colorspace
chip_list = vt.convert_image_list_colorspace(chip_list, colorspace)
ut.print_object_size(chip_list, 'chip_list')
chip1_list, chip2_list = ut.uninvert_unique_two_lists(chip_list, reconstruct_tup)
print('warping')
class PatchExtractCache(object):
def __init__(self, patch_size):
self.patch_size = patch_size
self.cache_ = {}
def idcache_find_misses(self, id_list, cache_):
# Generalize?
val_list = [cache_.get(id_, None) for id_ in id_list]
ismiss_list = [val is None for val in val_list]
return val_list, ismiss_list
def idcache_save(self, ismiss_list, miss_vals, id_list, val_list, cache_):
# Generalize?
miss_indices = ut.list_where(ismiss_list)
miss_ids = ut.compress(id_list, ismiss_list)
# overwrite missed output
for index, val in zip(miss_indices, miss_vals):
val_list[index] = val
# cache save
for id_, val in zip(miss_ids, miss_vals):
cache_[id_] = val
def cacheget_wraped_patches(self, aid, fxs, chip, kpts):
# +-- Custom ids
id_list = [(aid, fx) for fx in fxs]
# L__
val_list, ismiss_list = self.idcache_find_misses(id_list, self.cache_)
if any(ismiss_list):
# +-- Custom evaluate misses
kpts_miss = kpts.compress(ismiss_list, axis=0)
miss_vals = vt.get_warped_patches(
chip, kpts_miss, patch_size=self.patch_size
)[0]
# L__
self.idcache_save(ismiss_list, miss_vals, id_list, val_list, self.cache_)
return val_list
fx1_list = [fm.T[0] for fm in fm_list]
fx2_list = [fm.T[1] for fm in fm_list]
warp_iter1 = ut.ProgIter(
zip(aid1_list, fx1_list, chip1_list, kpts1_m_list),
nTotal=len(kpts1_m_list),
lbl='warp1',
adjust=True,
)
warp_iter2 = ut.ProgIter(
zip(aid2_list, fx2_list, chip2_list, kpts2_m_list),
nTotal=len(kpts2_m_list),
lbl='warp2',
adjust=True,
)
pec = PatchExtractCache(patch_size=patch_size)
warped_patches1_list = list(
itertools.starmap(pec.cacheget_wraped_patches, warp_iter1)
)
warped_patches2_list = list(
itertools.starmap(pec.cacheget_wraped_patches, warp_iter2)
)
# warp_iter1 = ut.ProgIter(zip(chip1_list, kpts1_m_list),
# nTotal=len(kpts1_m_list), lbl='warp1',
# adjust=True)
# warp_iter2 = ut.ProgIter(zip(chip2_list, kpts2_m_list),
# nTotal=len(kpts2_m_list), lbl='warp2',
# adjust=True)
# warped_patches1_list = [vt.get_warped_patches(chip1, kpts1, patch_size=patch_size)[0]
# for chip1, kpts1 in warp_iter1]
# warped_patches2_list = [vt.get_warped_patches(chip2, kpts2, patch_size=patch_size)[0]
# for chip2, kpts2 in warp_iter2]
ut.print_object_size(warped_patches1_list, 'warped_patches1_list')
ut.print_object_size(warped_patches2_list, 'warped_patches2_list')
len1_list = list(map(len, fm_list))
assert ut.lmap(len, warped_patches1_list) == ut.lmap(len, warped_patches2_list), 'bug'
assert ut.lmap(len, warped_patches1_list) == len1_list, 'bug'
print('flattening')
aid1_list_ = np.array(
ut.flatten([[aid1] * len1 for len1, aid1 in zip(len1_list, aid1_list)])
)
aid2_list_ = np.array(
ut.flatten([[aid2] * len1 for len1, aid2 in zip(len1_list, aid2_list)])
)
# Flatten metadata
flat_metadata = {
key: np.array(ut.flatten(val)) for key, val in metadata_lists.items()
}
flat_metadata['aid_pairs'] = np.hstack(
(np.array(aid1_list_)[:, None], np.array(aid2_list_)[:, None])
)
flat_metadata['fm'] = np.vstack(fm_list)
flat_metadata['kpts1_m'] = np.vstack(kpts1_m_list)
flat_metadata['kpts2_m'] = np.vstack(kpts2_m_list)
# flat_metadata = ut.map_dict_vals(np.array, flat_metadata)
warped_patch1_list = ut.flatten(warped_patches1_list)
warped_patch2_list = ut.flatten(warped_patches2_list)
# del warped_patches1_list
# del warped_patches2_list
return aid1_list_, aid2_list_, warped_patch1_list, warped_patch2_list, flat_metadata
[docs]def flatten_patch_data(
ibs, aid1_list, aid2_list, kpts1_m_list, kpts2_m_list, fm_list, metadata_lists
):
# TODO; rectify
len1_list = list(map(len, fm_list))
print('flattening')
aid1_list_ = np.array(
ut.flatten([[aid1] * len1 for len1, aid1 in zip(len1_list, aid1_list)])
)
aid2_list_ = np.array(
ut.flatten([[aid2] * len1 for len1, aid2 in zip(len1_list, aid2_list)])
)
# Flatten metadata
flat_metadata = {
key: np.array(ut.flatten(val)) for key, val in metadata_lists.items()
}
flat_metadata['aid_pairs'] = np.hstack(
(np.array(aid1_list_)[:, None], np.array(aid2_list_)[:, None])
)
flat_metadata['fm'] = np.vstack(fm_list)
flat_metadata['kpts1_m'] = np.vstack(kpts1_m_list)
flat_metadata['kpts2_m'] = np.vstack(kpts2_m_list)
labels = get_aidpair_training_labels(ibs, aid1_list_, aid2_list_)
return aid1_list_, aid2_list_, labels, flat_metadata
# TEMP
[docs]def get_patchmetric_training_data_and_labels(
ibs,
aid1_list,
aid2_list,
kpts1_m_list,
kpts2_m_list,
fm_list,
metadata_lists,
patch_size,
colorspace,
):
"""
Notes:
# FIXME: THERE ARE INCORRECT CORRESPONDENCES LABELED AS CORRECT THAT
# NEED MANUAL CORRECTION EITHER THROUGH EXPLICIT LABLEING OR
# SEGMENTATION MASKS
CommandLine:
python -m wbia_cnn.ingest_wbia --test-get_patchmetric_training_data_and_labels --show
Example:
>>> # DISABLE_DOCTEST
>>> from wbia_cnn.ingest_wbia import * # NOQA
>>> import wbia
>>> # build test data
>>> ibs = wbia.opendb(defaultdb='PZ_MTEST')
>>> (aid1_list, aid2_list, kpts1_m_list, kpts2_m_list, fm_list, metadata_lists) = get_aidpairs_and_matches(ibs, 10, 3)
>>> pmcfg = PatchMetricDataConfig()
>>> patch_size = pmcfg['patch_size']
>>> colorspace = pmcfg['colorspace']
>>> data, labels, flat_metadata = get_patchmetric_training_data_and_labels(ibs,
... aid1_list, aid2_list, kpts1_m_list, kpts2_m_list, fm_list,
... metadata_lists, patch_size, colorspace)
>>> ut.quit_if_noshow()
>>> draw_results.interact_siamsese_data_patches(labels, data)
>>> ut.show_if_requested()
"""
# To the removal of unknown pairs before computing the data
tup = get_aidpair_patchmatch_training_data(
ibs,
aid1_list,
aid2_list,
kpts1_m_list,
kpts2_m_list,
fm_list,
metadata_lists,
patch_size,
colorspace,
)
(aid1_list_, aid2_list_, warped_patch1_list, warped_patch2_list, flat_metadata) = tup
labels = get_aidpair_training_labels(ibs, aid1_list_, aid2_list_)
img_list = ut.flatten(list(zip(warped_patch1_list, warped_patch2_list)))
data = np.array(img_list)
del img_list
# data_per_label = 2
assert labels.shape[0] == data.shape[0] // 2
from wbia import const
assert np.all(labels != const.REVIEW.UNKNOWN)
return data, labels, flat_metadata
[docs]def mark_inconsistent_viewpoints(ibs, aid1_list, aid2_list):
yaw1_list = np.array(ut.replace_nones(ibs.get_annot_yaws(aid2_list), np.nan))
yaw2_list = np.array(ut.replace_nones(ibs.get_annot_yaws(aid1_list), np.nan))
yawdist_list = vt.ori_distance(yaw1_list, yaw2_list)
TAU = np.pi * 2
isinconsistent_list = yawdist_list > TAU / 8
return isinconsistent_list
[docs]def get_aidpair_training_labels(ibs, aid1_list_, aid2_list_):
"""
Returns:
ndarray: true in positions of matching, and false in positions of not matching
Example:
>>> # DISABLE_DOCTEST
>>> from wbia_cnn.ingest_wbia import * # NOQA
>>> import wbia
>>> # build test data
>>> ibs = wbia.opendb(defaultdb='PZ_MTEST')
>>> tup = get_aidpairs_and_matches(ibs)
>>> (aid1_list, aid2_list) = tup[0:2]
>>> aid1_list = aid1_list[0:min(100, len(aid1_list))]
>>> aid2_list = aid2_list[0:min(100, len(aid2_list))]
>>> # execute function
>>> labels = get_aidpair_training_labels(ibs, aid1_list, aid2_list)
>>> result = ('labels = %s' % (ut.numpy_str(labels, threshold=10),))
>>> print(result)
labels = np.array([1, 0, 0, ..., 0, 1, 0], dtype=np.int32)
"""
truth_list = ibs.get_aidpair_truths(aid1_list_, aid2_list_)
labels = truth_list
# Mark different viewpoints as unknown for training
isinconsistent_list = mark_inconsistent_viewpoints(ibs, aid1_list_, aid2_list_)
labels[isinconsistent_list] = ibs.const.REVIEW.UNKNOWN
return labels
# Estimate how big the patches will be
[docs]def estimate_data_bytes(num_data, item_shape):
data_per_label = 2
dtype_bytes = 1
estimated_bytes = np.prod(item_shape) * num_data * data_per_label * dtype_bytes
print('Estimated data size: ' + ut.byte_str2(estimated_bytes))
# class NewConfigBase(object):
# def __init__(self, **kwargs):
# self.update(**kwargs)
# def update(self, **kwargs):
# self.__dict__.update(**kwargs)
# ut.update_existing(self.__dict__, kwargs)
# unhandled_keys = set(kwargs.keys()) - set(self.__dict__.keys())
# if len(unhandled_keys) > 0:
# raise AssertionError(
# '[ConfigBaseError] unhandled_keys=%r' % (unhandled_keys,))
# def kw(self):
# return ut.KwargsWrapper(self)
try:
from wbia import dtool
[docs] class PartMatchDataConfig(dtool.Config):
_param_info_list = [
ut.ParamInfo('part_chip_width', 256),
ut.ParamInfo('part_chip_height', 128),
ut.ParamInfo('colorspace', 'gray', valid_values=['gray', 'bgr', 'lab']),
]
# def __init__(pmcfg, **kwargs):
# #pmcfg.part_chip_width = 256
# #pmcfg.part_chip_height = 128
# #pmcfg.colorspace = 'gray'
# super(PartMatchDataConfig, pmcfg).__init__(**kwargs)
# def get_cfgstr(pmcfg):
# cfgstr_list = [
# 'sz=(%d,%d)' % (pmcfg.part_chip_width, pmcfg.part_chip_height),
# ]
# cfgstr_list.append(pmcfg.colorspace)
# return ','.join(cfgstr_list)
[docs] def get_data_shape(pmcfg):
channels = 1 if pmcfg.colorspace == 'gray' else 3
return (pmcfg['part_chip_height'], pmcfg['part_chip_width'], channels)
[docs] class PatchMetricDataConfig(dtool.Config):
_param_info_list = [
ut.ParamInfo('patch_size', 64),
ut.ParamInfo('colorspace', 'gray', valid_values=['gray', 'bgr', 'lab']),
]
# def __init__(pmcfg, **kwargs):
# #pmcfg.patch_size = 64
# #pmcfg.colorspace = 'bgr'
# #pmcfg.colorspace = 'gray'
# super(PatchMetricDataConfig, pmcfg).__init__(**kwargs)
# def get_cfgstr(pmcfg):
# cfgstr_list = [
# 'patch_size=%d' % (pmcfg.patch_size,),
# ]
# #if pmcfg.colorspace != 'bgr':
# cfgstr_list.append(pmcfg.colorspace)
# return ','.join(cfgstr_list)
[docs] def get_data_shape(pmcfg):
channels = 1 if pmcfg['colorspace'] == 'gray' else 3
return (pmcfg['patch_size'], pmcfg['patch_size'], channels)
except ImportError:
PartMatchDataConfig = None
PatchMetricDataConfig = None
[docs]def cached_part_match_training_data_fpaths(
ibs, aid_pairs, label_list, flat_metadata, **kwargs
):
r"""
CommandLine:
python -m wbia_cnn --tf netrun --db PZ_MTEST \
--acfg ctrl:pername=None,excluderef=False --ensuredata \
--show --datatype=siam-part \
--nocache-train --nocache-cnn
"""
NOCACHE_TRAIN = ut.get_argflag('--nocache-train')
pmcfg = PartMatchDataConfig(**kwargs)
data_shape = pmcfg.get_data_shape()
semantic_uuids1 = ibs.get_annot_semantic_uuids(aid_pairs.T[0])
semantic_uuids2 = ibs.get_annot_semantic_uuids(aid_pairs.T[1])
aidpair_hashstr_list = list(map(ut.hashstr27, zip(semantic_uuids1, semantic_uuids2)))
training_dname = ut.hashstr_arr27(
aidpair_hashstr_list, pathsafe=True, lbl='part_match'
)
nets_dir = ibs.get_neuralnet_dir()
training_dpath = ut.unixjoin(nets_dir, training_dname)
ut.ensuredir(nets_dir)
ut.ensuredir(training_dpath)
view_train_dir = ut.get_argflag('--vtd')
if view_train_dir:
ut.view_directory(training_dpath)
cfgstr = pmcfg.get_cfgstr()
data_fpath = ut.unixjoin(training_dpath, 'data_' + cfgstr + '.hdf5')
labels_fpath = ut.unixjoin(training_dpath, 'labels_' + cfgstr + '.hdf5')
metadata_fpath = ut.unixjoin(training_dpath, 'metadata_' + cfgstr + '.hdf5')
if NOCACHE_TRAIN or not (
ut.checkpath(data_fpath, verbose=True)
and ut.checkpath(labels_fpath, verbose=True)
and ut.checkpath(metadata_fpath, verbose=True)
):
estimate_data_bytes(len(aid_pairs), pmcfg.get_data_shape())
# Extract the data and labels
rchip1_list, rchip2_list = extract_annotpair_training_chips(
ibs, aid_pairs, **pmcfg
)
datagen_ = zip(rchip1_list, rchip2_list)
datagen = ut.ProgIter(
datagen_, nTotal=len(rchip1_list), lbl='Evaluating', adjust=False
)
data = np.array(list(ut.flatten(datagen)))
flat_metadata = ut.map_dict_vals(np.array, flat_metadata)
# img_list = ut.flatten(list(zip(rchip1_list,
# rchip2_list)))
# data = np.array(img_list)
# del img_list
labels = label_list
# data_per_label = 2
assert labels.shape[0] == data.shape[0] // 2
# data, labels, flat_metadata
# Save the data to cache
ut.assert_eq(data.shape[1], pmcfg['part_chip_height'])
ut.assert_eq(data.shape[2], pmcfg['part_chip_width'])
# TODO; save metadata
print('[write_part_data] np.shape(data) = %r' % (np.shape(data),))
print('[write_part_labels] np.shape(labels) = %r' % (np.shape(labels),))
# TODO hdf5 for large data
ut.save_hdf5(data_fpath, data)
ut.save_hdf5(labels_fpath, labels)
ut.save_hdf5(metadata_fpath, flat_metadata)
# ut.save_cPkl(data_fpath, data)
# ut.save_cPkl(labels_fpath, labels)
# ut.save_cPkl(metadata_fpath, flat_metadata)
else:
print('data and labels cache hit')
return data_fpath, labels_fpath, metadata_fpath, training_dpath, data_shape
[docs]def cached_patchmetric_training_data_fpaths(
ibs,
aid1_list,
aid2_list,
kpts1_m_list,
kpts2_m_list,
fm_list,
metadata_lists,
**kwargs
):
"""
todo use size in cfgstrings
kwargs is used for PatchMetricDataConfig
from wbia_cnn.ingest_wbia import *
"""
import utool as ut
pmcfg = PatchMetricDataConfig(**kwargs)
data_shape = pmcfg.get_data_shape()
NOCACHE_TRAIN = ut.get_argflag(('--nocache-train', '--nocache-cnn'))
semantic_uuids1 = ibs.get_annot_semantic_uuids(aid1_list)
semantic_uuids2 = ibs.get_annot_semantic_uuids(aid2_list)
aidpair_hashstr_list = list(map(ut.hashstr27, zip(semantic_uuids1, semantic_uuids2)))
training_dname = ut.hashstr_arr27(
aidpair_hashstr_list, pathsafe=True, lbl='patchmatch'
)
nets_dir = ibs.get_neuralnet_dir()
training_dpath = ut.unixjoin(nets_dir, training_dname)
ut.ensuredir(nets_dir)
ut.ensuredir(training_dpath)
view_train_dir = ut.get_argflag('--vtd')
if view_train_dir:
ut.view_directory(training_dpath)
fm_hashstr = ut.hashstr_arr27(np.vstack(fm_list), pathsafe=True, lbl='fm')
cfgstr = fm_hashstr + '_' + pmcfg.get_cfgstr()
data_fpath = ut.unixjoin(training_dpath, 'data_%s.hdf5' % (cfgstr,))
labels_fpath = ut.unixjoin(training_dpath, 'labels_%s.hdf5' % (cfgstr,))
metadata_fpath = ut.unixjoin(training_dpath, 'metadata_%s.hdf5' % (cfgstr,))
if NOCACHE_TRAIN or not (
ut.checkpath(data_fpath, verbose=True)
and ut.checkpath(labels_fpath, verbose=True)
and ut.checkpath(metadata_fpath, verbose=True)
):
estimate_data_bytes(sum(list(map(len, fm_list))), pmcfg.get_data_shape())
# Extract the data and labels
data, labels, flat_metadata = get_patchmetric_training_data_and_labels(
ibs,
aid1_list,
aid2_list,
kpts1_m_list,
kpts2_m_list,
fm_list,
metadata_lists,
**pmcfg
)
# Save the data to cache
ut.assert_eq(data.shape[1], pmcfg['patch_size'])
ut.assert_eq(data.shape[2], pmcfg['patch_size'])
# TODO; save metadata
print('[write_data_and_labels] np.shape(data) = %r' % (np.shape(data),))
print('[write_data_and_labels] np.shape(labels) = %r' % (np.shape(labels),))
# TODO hdf5 for large data
ut.save_hdf5(data_fpath, data)
ut.save_hdf5(labels_fpath, labels)
ut.save_hdf5(metadata_fpath, flat_metadata)
# ut.save_cPkl(data_fpath, data)
# ut.save_cPkl(labels_fpath, labels)
# ut.save_cPkl(metadata_fpath, flat_metadata)
else:
print('data and labels cache hit')
return data_fpath, labels_fpath, metadata_fpath, training_dpath, data_shape
[docs]def remove_unknown_training_pairs(ibs, aid1_list, aid2_list):
return aid1_list, aid2_list
[docs]def get_aidpairs_and_matches(
ibs,
max_examples=None,
num_top=3,
controlled=True,
min_featweight=None,
acfg_name=None,
):
r"""
Gets data for training a patch match network.
Args:
ibs (IBEISController): ibeis controller object
max_examples (None): (default = None)
num_top (int): (default = 3)
controlled (bool): (default = True)
Returns:
tuple : patchmatch_tup = (aid1_list, aid2_list, kpts1_m_list,
kpts2_m_list, fm_list, metadata_lists)
aid pairs and matching keypoint pairs as well as the original index
of the feature matches
CommandLine:
python -m wbia_cnn.ingest_wbia get_aidpairs_and_matches --show
python -m wbia_cnn.ingest_wbia --test-get_aidpairs_and_matches --db PZ_Master0
python -m wbia_cnn.ingest_wbia --test-get_aidpairs_and_matches --db PZ_Master1 --acfg default --show
python -m wbia_cnn.ingest_wbia --test-get_aidpairs_and_matches --db PZ_MTEST --acfg ctrl:qindex=0:10 --show
python -m wbia_cnn.ingest_wbia --test-get_aidpairs_and_matches --db NNP_Master3
python -m wbia_cnn.ingest_wbia --test-get_aidpairs_and_matches --db PZ_MTEST \
--acfg default:is_known=True,qmin_pername=2,view=primary,species=primary,minqual=ok --show
python -m wbia_cnn.ingest_wbia --test-get_aidpairs_and_matches --db PZ_Master1 \
--acfg default:is_known=True,qmin_pername=2,view=primary,species=primary,minqual=ok --show
Example:
>>> # DISABLE_DOCTEST
>>> from wbia_cnn.ingest_wbia import * # NOQA
>>> import wbia
>>> # build test data
>>> ibs = wbia.opendb(defaultdb='PZ_MTEST')
>>> acfg_name = ut.get_argval(('--aidcfg', '--acfg', '-a', '--acfg-name'),
... type_=str,
... default='ctrl:qindex=0:10')
>>> max_examples = None
>>> num_top = None
>>> controlled = True
>>> min_featweight = .99
>>> patchmatch_tup = get_aidpairs_and_matches(
>>> ibs, max_examples=max_examples, num_top=num_top,
>>> controlled=controlled, min_featweight=min_featweight,
>>> acfg_name=acfg_name)
>>> (aid1_list, aid2_list, kpts1_m_list, kpts2_m_list, fm_list, metadata_lists) = patchmatch_tup
>>> ut.quit_if_noshow()
>>> print('Visualizing')
>>> # Visualize feature scores
>>> tup = flatten_patch_data(
>>> ibs, aid1_list, aid2_list, kpts1_m_list, kpts2_m_list, fm_list,
>>> metadata_lists)
>>> labels, flat_metadata = tup[2:]
>>> fs = flat_metadata['fs']
>>> encoder = vt.ScoreNormalizer(adjust=1)
>>> encoder.fit(fs, labels)
>>> encoder.visualize(bin_width=.001, fnum=None)
>>> # Visualize parent matches
>>> _iter = list(zip(aid1_list, aid2_list, kpts1_m_list, kpts2_m_list, fm_list))
>>> _iter = ut.InteractiveIter(_iter, display_item=False)
>>> import plottool as pt
>>> import wbia.viz
>>> for aid1, aid2, kpts1, kpts2, fm in _iter:
>>> pt.reset()
>>> print('aid2 = %r' % (aid2,))
>>> print('aid1 = %r' % (aid1,))
>>> print('len(fm) = %r' % (len(fm),))
>>> wbia.viz.viz_matches.show_matches2(ibs, aid1, aid2, fm=None, kpts1=kpts1, kpts2=kpts2)
>>> pt.update()
>>> ut.show_if_requested()
"""
def get_query_results():
if acfg_name is not None:
print('NEW WAY OF FILTERING')
from wbia.expt import experiment_helpers
acfg_list, expanded_aids_list = experiment_helpers.get_annotcfg_list(
ibs, [acfg_name]
)
# acfg = acfg_list[0]
expanded_aids = expanded_aids_list[0]
qaid_list, daid_list = expanded_aids
else:
print('OLD WAY OF FILTERING')
from wbia.other import ibsfuncs
if controlled:
# TODO: use acfg config
qaid_list = ibsfuncs.get_two_annots_per_name_and_singletons(
ibs, onlygt=True
)
daid_list = ibsfuncs.get_two_annots_per_name_and_singletons(
ibs, onlygt=False
)
else:
qaid_list = ibs.get_valid_aids()
# from wbia.algo.hots import chip_match
qaid_list = ut.compress(
qaid_list, ibs.get_annot_has_groundtruth(qaid_list)
)
daid_list = qaid_list
if max_examples is not None:
daid_list = daid_list[0 : min(max_examples, len(daid_list))]
if max_examples is not None:
qaid_list = qaid_list[0 : min(max_examples, len(qaid_list))]
cfgdict = {
'affine_invariance': False,
'fg_on': not ut.WIN32,
}
ibs.print_annotconfig_stats(qaid_list, daid_list, bigstr=True)
qreq_ = ibs.new_query_request(qaid_list, daid_list, cfgdict=cfgdict)
cm_list = qreq_.execute()
return cm_list, qreq_
cm_list, qreq_ = get_query_results()
def get_matchdata1():
# TODO: rectify with code in viz_nearest_descriptors to compute the flat lists
# Get aid pairs and feature matches
if num_top is None:
aids2_list = [cm.get_top_aids() for cm in cm_list]
else:
aids2_list = [cm.get_top_aids()[0:num_top] for cm in cm_list]
aids1_list = [[cm.qaid] * len(aids2) for cm, aids2 in zip(cm_list, aids2_list)]
aid1_list_all = np.array(ut.flatten(aids1_list))
aid2_list_all = np.array(ut.flatten(aids2_list))
def take_qres_list_attr(attr):
attrs_list = [
ut.dict_take(getattr(cm, attr), aids2)
for cm, aids2 in zip(cm_list, aids2_list)
]
attr_list = ut.flatten(attrs_list)
return attr_list
fm_list_all = take_qres_list_attr(attr='aid2_fm')
metadata_all = {}
filtkey_lists = ut.unique_unordered([tuple(cm.filtkey_list) for cm in cm_list])
assert len(filtkey_lists) == 1, 'multiple fitlers used in this query'
filtkey_list = filtkey_lists[0]
fsv_list = take_qres_list_attr('aid2_fsv')
for index, key in enumerate(filtkey_list):
metadata_all[key] = [fsv.T[index] for fsv in fsv_list]
metadata_all['fs'] = take_qres_list_attr('aid2_fs')
if True:
neg_aid_pool = np.unique(ut.flatten([aid1_list_all, aid2_list_all]))
randneg_aid1 = []
randneg_aid2 = []
randneg_fm = []
rand_meta = {key: [] for key in metadata_all.keys()}
neg_nid_pool = np.array(ibs.get_annot_nids(neg_aid_pool))
rng = np.random.RandomState(0)
num_rand_neg_per_aid = 3
num_rand_fm = 30
for aid, nid in ut.ProgIter(
list(zip(neg_aid_pool, neg_nid_pool)), 'sample aid rand'
):
# is_valid = get_badtag_flags(ibs, [aid] * len(neg_aid_pool), neg_aid_pool)
is_valid = np.not_equal(neg_nid_pool, nid)
# is_valid = np.logical_and(, is_valid)
p = is_valid / (is_valid.sum())
chosen = rng.choice(
neg_aid_pool, size=num_rand_neg_per_aid, replace=False, p=p
)
# chosen_pairs = list(ut.iprod([aid], chosen))
randneg_aid1.extend([aid] * len(chosen))
randneg_aid2.extend(chosen)
neg_fws1 = ibs.get_annot_fgweights(
randneg_aid1, config2_=qreq_.get_internal_query_config2()
)
neg_fws2 = ibs.get_annot_fgweights(
randneg_aid2, config2_=qreq_.get_internal_data_config2()
)
for fw1, fw2 in ut.ProgIter(list(zip(neg_fws1, neg_fws2)), 'sample fm rand'):
valid_fx1s = np.where(fw1 > min_featweight)[0]
valid_fx2s = np.where(fw2 > min_featweight)[0]
size = min(num_rand_fm, len(valid_fx1s), len(valid_fx2s))
if size > 0:
chosen_fx1 = rng.choice(valid_fx1s, size=size, replace=False)
chosen_fx2 = rng.choice(valid_fx2s, size=size, replace=False)
fm = np.vstack([chosen_fx1, chosen_fx2]).T
else:
fm = np.empty((0, 2), dtype=np.int)
randneg_fm.append(fm)
for key in rand_meta.keys():
rand_meta[key].append(np.array([0] * len(fm)))
prev_total = sum(map(len, fm_list_all))
adding = sum(map(len, randneg_fm))
print('prev_total = %r' % (prev_total,))
print('adding = %r' % (adding,))
metadata_all = ut.dict_isect_combine(metadata_all, rand_meta)
fm_list_all = fm_list_all + randneg_fm
aid1_list_all = np.append(aid1_list_all, randneg_aid1)
aid2_list_all = np.append(aid2_list_all, randneg_aid2)
# extract metadata (like feature scores and whatnot)
return aid1_list_all, aid2_list_all, fm_list_all, metadata_all
def get_badtag_flags(ibs, aid1_list, aid2_list):
from wbia import tag_funcs
tag_filter_kw = dict(
has_none=['photobomb', 'scenerymatch', 'joincase', 'splitcase']
)
am_rowids1 = ibs.get_annotmatch_rowid_from_undirected_superkey(
aid1_list, aid2_list
)
am_rowids2 = ibs.get_annotmatch_rowid_from_undirected_superkey(
aid2_list, aid1_list
)
case_tags1 = ibs.get_annotmatch_case_tags(am_rowids1)
case_tags2 = ibs.get_annotmatch_case_tags(am_rowids2)
flags1 = tag_funcs.filterflags_general_tags(case_tags1, **tag_filter_kw)
flags2 = tag_funcs.filterflags_general_tags(case_tags2, **tag_filter_kw)
flags_tag = ut.and_lists(flags1, flags2)
return flags_tag
def get_matchdata2():
aid1_list_all, aid2_list_all, fm_list_all, metadata_all = get_matchdata1()
# Filter out bad training examples
# (we are currently in annot-vs-annot format, not yet in patch-vs-patch)
labels_all = get_aidpair_training_labels(ibs, aid1_list_all, aid2_list_all)
has_gt = labels_all != ibs.const.REVIEW.UNKNOWN
nonempty = [len(fm) > 0 for fm in fm_list_all]
# Filter pairs bad pairs of aids
# using case tags
flags_tag = get_badtag_flags(ibs, aid1_list_all, aid2_list_all)
print(ut.filtered_infostr(flags_tag, 'annots', 'tag filters'))
flags = ut.and_lists(flags_tag, flags_tag)
#
MIN_TD = 5 * 60 # 5 minutes at least
timedelta_list = np.abs(ibs.get_annot_pair_timdelta(aid1_list_all, aid2_list_all))
# isnan = np.isnan(timedelta_list)
gf_tdflags = np.logical_or(
labels_all == ibs.const.REVIEW.MATCH, timedelta_list > MIN_TD
)
print(ut.filtered_infostr(gf_tdflags, 'gf annots', 'timestamp'))
flags = ut.and_lists(flags, gf_tdflags)
# Remove small time deltas
# --
print(ut.filtered_infostr(flags, 'total invalid annots'))
isvalid = np.logical_and(np.logical_and(has_gt, nonempty), flags)
aid1_list_uneq = ut.compress(aid1_list_all, isvalid)
aid2_list_uneq = ut.compress(aid2_list_all, isvalid)
labels_uneq = ut.compress(labels_all, isvalid)
fm_list_uneq = ut.compress(fm_list_all, isvalid)
metadata_uneq = {
key: ut.compress(vals, isvalid) for key, vals in metadata_all.items()
}
return aid1_list_uneq, aid2_list_uneq, labels_uneq, fm_list_uneq, metadata_uneq
def get_matchdata3():
# Filters in place
(
aid1_list_uneq,
aid2_list_uneq,
labels_uneq,
fm_list_uneq,
metadata_uneq,
) = get_matchdata2()
# min_featweight = None
if min_featweight is not None:
print('filter by featweight')
# Remove feature matches where the foreground weight is under a threshold
flags_list = []
for index in ut.ProgIter(range(len(aid1_list_uneq)), 'filt fw', adjust=True):
aid1 = aid1_list_uneq[index]
aid2 = aid2_list_uneq[index]
fm = fm_list_uneq[index]
fgweight1 = ibs.get_annot_fgweights(
[aid1], config2_=qreq_.get_internal_query_config2()
)[0][fm.T[0]]
fgweight2 = ibs.get_annot_fgweights(
[aid2], config2_=qreq_.get_internal_data_config2()
)[0][fm.T[1]]
flags = np.logical_and(
fgweight1 > min_featweight, fgweight2 > min_featweight
)
flags_list.append(flags)
print(
ut.filtered_infostr(ut.flatten(flags_list), 'feat matches', 'featweight')
)
fm_list_uneq2 = vt.zipcompress_safe(fm_list_uneq, flags_list, axis=0)
metadata_uneq2 = {
key: vt.zipcompress_safe(vals, flags_list, axis=0)
for key, vals in metadata_uneq.items()
}
else:
fm_list_uneq2 = fm_list_uneq
metadata_uneq2 = metadata_uneq
return aid1_list_uneq, aid2_list_uneq, labels_uneq, fm_list_uneq2, metadata_uneq2
def equalize_flat_flags(flat_labels, flat_scores):
labelhist = ut.dict_hist(flat_labels)
# Print input distribution of labels
print('[ingest_wbia] original label histogram = \n' + ut.dict_str(labelhist))
print('[ingest_wbia] total = %r' % (sum(list(labelhist.values()))))
pref_method = 'rand'
# pref_method = 'scores'
seed = 0
rng = np.random.RandomState(seed)
def pref_rand(type_indicies, min_, rng=rng):
return rng.choice(type_indicies, size=min_, replace=False)
def pref_first(type_indicies, min_):
return type_indicies[:min_]
def pref_scores(type_indicies, min_, flat_scores=flat_scores):
sortx = flat_scores.take(type_indicies).argsort()[::-1]
return type_indicies.take(sortx[:min_])
sample_func = {'rand': pref_rand, 'scores': pref_scores, 'first': pref_first}[
pref_method
]
# Figure out how much of each label needs to be removed
# record the indicies that will not be filtered in keep_indicies_list
allowed_ratio = ut.PHI * 0.8
# allowed_ratio = 1.0
# Find the maximum and minimum number of labels over all types
true_max_ = max(labelhist.values())
true_min_ = min(labelhist.values())
# Allow for some window around the minimum
min_ = min(int(true_min_ * allowed_ratio), true_max_)
print('Equalizing label distribution with method=%r' % (pref_method,))
print('Allowing at most %d labels of a type' % (min_,))
key_list, type_indicies_list = vt.group_indices(flat_labels)
# type_indicies_list = [np.where(flat_labels == key)[0]
# for key in six.iterkeys(labelhist)]
keep_indicies_list = []
for type_indicies in type_indicies_list:
if min_ >= len(type_indicies):
keep_indicies = type_indicies
else:
keep_indicies = sample_func(type_indicies, min_)
keep_indicies_list.append(keep_indicies)
# Create a flag for each flat label (patch-pair)
flat_keep_idxs = np.hstack(keep_indicies_list)
flat_flag_list = vt.index_to_boolmask(flat_keep_idxs, maxval=len(flat_labels))
return flat_flag_list
def equalize_labels():
(
aid1_list_uneq,
aid2_list_uneq,
labels_uneq,
fm_list_uneq2,
metadata_uneq2,
) = get_matchdata3()
print('flattening')
# Find out how many examples each source holds
len1_list = list(map(len, fm_list_uneq2))
# Expand source labels so one exists for each datapoint
flat_labels = ut.flatten(
[[label] * len1 for len1, label in zip(len1_list, labels_uneq)]
)
flat_labels = np.array(flat_labels)
flat_scores = np.hstack(metadata_uneq2['fs'])
flat_flag_list = equalize_flat_flags(flat_labels, flat_scores)
# Unflatten back into source-vs-source pairs (annot-vs-annot)
flags_list = ut.unflatten2(flat_flag_list, np.cumsum(len1_list))
assert ut.depth_profile(flags_list) == ut.depth_profile(metadata_uneq2['fs'])
fm_list_ = vt.zipcompress_safe(fm_list_uneq2, flags_list, axis=0)
metadata_ = dict(
[
(key, vt.zipcompress_safe(vals, flags_list))
for key, vals in metadata_uneq2.items()
]
)
# remove empty aids
isnonempty_list = [len(fm) > 0 for fm in fm_list_]
fm_list_eq = ut.compress(fm_list_, isnonempty_list)
aid1_list_eq = ut.compress(aid1_list_uneq, isnonempty_list)
aid2_list_eq = ut.compress(aid2_list_uneq, isnonempty_list)
labels_eq = ut.compress(labels_uneq, isnonempty_list)
metadata_eq = dict(
[(key, ut.compress(vals, isnonempty_list)) for key, vals in metadata_.items()]
)
# PRINT NEW LABEL STATS
len1_list = list(map(len, fm_list_eq))
flat_labels_eq = ut.flatten(
[[label] * len1 for len1, label in zip(len1_list, labels_eq)]
)
labelhist_eq = {
key: len(val)
for key, val in six.iteritems(ut.group_items(flat_labels_eq, flat_labels_eq))
}
print('[ingest_wbia] equalized label histogram = \n' + ut.dict_str(labelhist_eq))
print('[ingest_wbia] total = %r' % (sum(list(labelhist_eq.values()))))
# --
return aid1_list_eq, aid2_list_eq, fm_list_eq, labels_eq, metadata_eq
# EQUALIZE_LABELS = True
# if EQUALIZE_LABELS:
aid1_list_eq, aid2_list_eq, fm_list_eq, labels_eq, metadata_eq = equalize_labels()
# Convert annot-vs-annot pairs into raw feature-vs-feature pairs
print('Building feature indicies')
fx1_list = [fm.T[0] for fm in fm_list_eq]
fx2_list = [fm.T[1] for fm in fm_list_eq]
# Hack: use the ibeis cache to make quick lookups
# with ut.Timer('Reading keypoint sets (caching unique keypoints)'):
# ibs.get_annot_kpts(list(set(aid1_list_eq + aid2_list_eq)),
# config2_=qreq_.get_internal_query_config2())
with ut.Timer('Reading keypoint sets from cache'):
kpts1_list = ibs.get_annot_kpts(
aid1_list_eq, config2_=qreq_.get_internal_query_config2()
)
kpts2_list = ibs.get_annot_kpts(
aid2_list_eq, config2_=qreq_.get_internal_query_config2()
)
# Save some memory
ibs.print_cachestats_str()
ibs.clear_table_cache(ibs.const.FEATURE_TABLE)
print('Taking matching keypoints')
kpts1_m_list = [kpts1.take(fx1, axis=0) for kpts1, fx1 in zip(kpts1_list, fx1_list)]
kpts2_m_list = [kpts2.take(fx2, axis=0) for kpts2, fx2 in zip(kpts2_list, fx2_list)]
(aid1_list, aid2_list, fm_list, metadata_lists) = (
aid1_list_eq,
aid2_list_eq,
fm_list_eq,
metadata_eq,
)
# assert ut.get_list_column(ut.depth_profile(kpts1_m_list), 0) ==
# ut.depth_profile(metadata_lists['fs'])
patchmatch_tup = (
aid1_list,
aid2_list,
kpts1_m_list,
kpts2_m_list,
fm_list,
metadata_lists,
)
return patchmatch_tup
[docs]def get_background_training_patches2(
ibs,
target_species,
dest_path=None,
patch_size=48,
patch_size_min=0.80,
patch_size_max=1.25,
annot_size=300,
patience=20,
patches_per_annotation=30,
global_limit=None,
train_gid_set=None,
visualize=False,
visualize_path=None,
tiles=False,
inside_boundary=True,
purge=False,
shuffle=True,
supercharge_negative_multiplier=2.0,
undercharge_negative_multiplier=0.5,
):
"""
Get data for bg
"""
import random
from os.path import join, expanduser
import cv2
def resize_target(image, target_height=None, target_width=None):
assert target_height is not None or target_width is not None
height, width = image.shape[:2]
if target_height is not None and target_width is not None:
h = target_height
w = target_width
elif target_height is not None:
h = target_height
w = (width / height) * h
elif target_width is not None:
w = target_width
h = (height / width) * w
w, h = int(w), int(h)
return cv2.resize(image, (w, h), interpolation=cv2.INTER_LANCZOS4)
def point_inside(tup1, tup2):
(x, y) = tup1
(x0, y0, w, h) = tup2
x1 = x0 + w
y1 = y0 + h
return x0 <= x and x <= x1 and y0 <= y and y <= y1
if dest_path is None:
dest_path = expanduser(join('~', 'Desktop', 'extracted'))
if visualize_path is None:
visualize_path = expanduser(join('~', 'Desktop', 'visualize', 'background'))
if purge:
ut.delete(visualize_path)
ut.ensuredir(visualize_path)
dbname = ibs.dbname
name = 'background'
dbname = ibs.dbname
name_path = join(dest_path, name)
raw_path = join(name_path, 'raw')
labels_path = join(name_path, 'labels')
print(dest_path)
if purge:
ut.delete(dest_path)
ut.ensuredir(dest_path)
ut.ensuredir(raw_path)
ut.ensuredir(labels_path)
if train_gid_set is None:
train_gid_set = set(
ibs.get_imageset_gids(ibs.get_imageset_imgsetids_from_text('TRAIN_SET'))
)
if shuffle:
train_gid_set = list(train_gid_set)
random.shuffle(train_gid_set)
aids_list = ibs.get_image_aids(train_gid_set)
if tiles:
bboxes_list = [
ibs.get_annot_bboxes(aid_list, reference_tile_gid=gid)
for gid, aid_list in zip(train_gid_set, aids_list)
]
else:
bboxes_list = [ibs.get_annot_bboxes(aid_list) for aid_list in aids_list]
species_list_list = [ibs.get_annot_species_texts(aid_list) for aid_list in aids_list]
zipped = zip(train_gid_set, aids_list, bboxes_list, species_list_list)
label_list = []
global_positives = 0
global_negatives = 0
for gid, aid_list, bbox_list, species_list in zipped:
image = ibs.get_images(gid)
h, w, c = image.shape
if visualize:
canvas = image.copy()
else:
canvas = None
args = (
gid,
global_positives,
global_negatives,
len(label_list),
)
print('Processing GID: %r [ %r / %r = %r]' % args)
print('\tAIDS : %r' % (aid_list,))
print('\tBBOXES: %r' % (bbox_list,))
if global_limit is not None:
if global_negatives + global_positives >= global_limit:
print('\tHIT GLOBAL LIMIT')
continue
if len(aid_list) == 0 and len(bbox_list) == 0:
aid_list = [None]
bbox_list = [None]
for aid, bbox, species in zip(aid_list, bbox_list, species_list):
positives = 0
negatives = 0
if target_species == 'turtle_sea':
turtle_sea_species_list = [
'turtle_green',
'turtle_green+head',
'turtle_hawksbill',
'turtle_hawksbill+head',
'turtle_oliveridley',
'turtle_oliveridley+head',
'turtle_sea',
'turtle_sea+head',
]
if species not in turtle_sea_species_list:
print(
'Skipping aid %r (bad species: %s)'
% (
aid,
species,
)
)
continue
elif target_species == 'wild_dog':
wild_dog_species_list = [
'____',
'wild_dog',
'wild_dog_dark',
'wild_dog_light',
'wild_dog_puppy',
'wild_dog_standard',
'wild_dog_tan',
]
if species not in wild_dog_species_list:
print(
'Skipping aid %r (bad species: %s)'
% (
aid,
species,
)
)
continue
elif species != target_species:
print(
'Skipping aid %r (bad species: %s)'
% (
aid,
species,
)
)
continue
if aid is not None:
xtl, ytl, w_, h_ = bbox
xbr, ybr = xtl + w_, ytl + h_
if canvas is not None:
cv2.rectangle(canvas, (xtl, ytl), (xbr, ybr), (255, 0, 0))
if min(w_, h_) / max(w_, h_) <= 0.25:
print('Skipping aid %r (aspect ratio)' % (aid,))
continue
modifier = w_ / annot_size
patch_size_ = patch_size * modifier
patch_size_min_ = patch_size_ * patch_size_min
patch_size_max_ = patch_size_ * patch_size_max
for index in range(patches_per_annotation):
counter = 0
found = False
while not found and counter < patience:
counter += 1
patch_size_random = random.uniform(
patch_size_min_, patch_size_max_
)
patch_size_final = int(round(patch_size_random))
radius = patch_size_final // 2
if inside_boundary:
if patch_size_final > w_ or patch_size_final > h_:
print(
'Skipping aid %r (patch_size_final too big)' % (aid,)
)
continue
centerx = random.randint(xtl + radius, xbr - radius)
centery = random.randint(ytl + radius, ybr - radius)
else:
centerx = random.randint(xtl, xbr)
centery = random.randint(ytl, ybr)
x0 = centerx - radius
y0 = centery - radius
x1 = centerx + radius
y1 = centery + radius
if x0 < 0 or x0 >= w or x1 < 0 or x1 >= w:
print('Skipping aid %r (bounds check, width)' % (aid,))
continue
if y0 < 0 or y0 >= w or y1 < 0 or y1 >= w:
print('Skipping aid %r (bounds check, height)' % (aid,))
continue
found = True
# Sanity checks
try:
assert x1 > x0
assert y1 > y0
if inside_boundary:
assert x1 - x0 >= patch_size // 2
assert y1 - y0 >= patch_size // 2
assert x0 >= 0 and x0 < w and x1 >= 0 and x1 < w
assert y0 >= 0 and y0 < h and y1 >= 0 and y1 < h
except AssertionError:
print('Skipping aid %r (sanity check)' % (aid,))
found = False
if found:
positives += 1
if canvas is not None:
cv2.rectangle(canvas, (x0, y0), (x1, y1), (0, 255, 0))
chip = image[y0:y1, x0:x1]
chip = cv2.resize(
chip,
(patch_size, patch_size),
interpolation=cv2.INTER_LANCZOS4,
)
# positive_category = '%s' % (species, )
positive_category = 'positive'
values = (
dbname,
gid,
positive_category,
x0,
y0,
x1,
y1,
)
patch_filename = (
'%s_patch_gid_%s_%s_bbox_%d_%d_%d_%d.png' % values
)
patch_filepath = join(raw_path, patch_filename)
cv2.imwrite(patch_filepath, chip)
label = '%s,%s' % (patch_filename, positive_category)
label_list.append(label)
else:
print('Skipping aid %r (not found)' % (aid,))
positives_ = positives
else:
modifier = 4.0
patch_size_ = patch_size * modifier
patch_size_min_ = patch_size_ * patch_size_min
patch_size_max_ = patch_size_ * patch_size_max
positives_ = patches_per_annotation
delta = global_positives - global_negatives
if delta >= 2 * patches_per_annotation:
print('SUPERCHARGE NEGATIVES')
positives_ = int(positives_ * supercharge_negative_multiplier)
elif delta <= -2 * patches_per_annotation:
print('UNDERCHARGE NEGATIVES')
positives_ = int(positives_ * undercharge_negative_multiplier)
for index in range(positives_):
counter = 0
found = False
while not found and counter < patience:
counter += 1
patch_size_random = random.uniform(patch_size_min_, patch_size_max_)
patch_size_final = int(round(patch_size_random))
radius = patch_size_final // 2
if radius >= w // 2 or radius >= h // 2:
continue
centerx = random.randint(radius, w - radius)
centery = random.randint(radius, h - radius)
inside = False
for bbox in bbox_list:
if bbox is None:
continue
if point_inside((centerx, centery), bbox):
inside = True
break
if inside:
continue
x0 = centerx - radius
y0 = centery - radius
x1 = centerx + radius
y1 = centery + radius
if x0 < 0 or x0 >= w or x1 < 0 or x1 >= w:
continue
if y0 < 0 or y0 >= w or y1 < 0 or y1 >= w:
continue
found = True
# Sanity checks
try:
assert x1 > x0
assert y1 > y0
assert x1 - x0 >= patch_size // 2
assert y1 - y0 >= patch_size // 2
assert x0 >= 0 and x0 < w and x1 >= 0 and x1 < w
assert y0 >= 0 and y0 < h and y1 >= 0 and y1 < h
except AssertionError:
found = False
if found:
negatives += 1
if canvas is not None:
cv2.rectangle(canvas, (x0, y0), (x1, y1), (0, 0, 255))
chip = image[y0:y1, x0:x1]
chip = cv2.resize(
chip, (patch_size, patch_size), interpolation=cv2.INTER_LANCZOS4
)
values = (
dbname,
gid,
'negative',
x0,
y0,
x1,
y1,
)
patch_filename = '%s_patch_gid_%s_%s_bbox_%d_%d_%d_%d.png' % values
patch_filepath = join(raw_path, patch_filename)
cv2.imwrite(patch_filepath, chip)
label = '%s,%s' % (patch_filename, 'negative')
label_list.append(label)
global_positives += positives
global_negatives += negatives
if canvas is not None:
canvas_filename = 'background_gid_%s_species_%s.png' % (
gid,
target_species,
)
canvas_filepath = join(visualize_path, canvas_filename)
image = resize_target(canvas, target_width=1000)
cv2.imwrite(canvas_filepath, canvas)
args = (
global_positives,
global_negatives,
len(label_list),
)
print('Final Split: [ %r / %r = %r]' % args)
with open(join(labels_path, 'labels.csv'), 'a') as labels:
label_str = '\n'.join(label_list) + '\n'
labels.write(label_str)
return name_path
[docs]def get_aoi_training_data(ibs, dest_path=None, target_species_list=None, purge=True):
"""
Get data for bg
"""
from os.path import join, expanduser
if dest_path is None:
dest_path = expanduser(join('~', 'Desktop', 'extracted'))
dbname = ibs.dbname
name = 'aoi'
dbname = ibs.dbname
name_path = join(dest_path, name)
raw_path = join(name_path, 'raw')
labels_path = join(name_path, 'labels')
if purge:
ut.delete(name_path)
ut.ensuredir(dest_path)
ut.ensuredir(name_path)
ut.ensuredir(raw_path)
ut.ensuredir(labels_path)
# gid_list = ibs.get_valid_gids()
train_gid_set = list(
set(ibs.get_imageset_gids(ibs.get_imageset_imgsetids_from_text('TRAIN_SET')))
)
config = {
'algo': 'resnet',
}
data_list = ibs.depc_image.get_property(
'features', train_gid_set, 'vector', config=config
)
reviewed_list = ibs.get_image_reviewed(train_gid_set)
reviewed_list = [True] * len(data_list)
aids_list = ibs.get_image_aids(train_gid_set)
size_list = ibs.get_image_sizes(train_gid_set)
bboxes_list = [ibs.get_annot_bboxes(aid_list) for aid_list in aids_list]
species_list_list = [ibs.get_annot_species_texts(aid_list) for aid_list in aids_list]
interest_list_list = [ibs.get_annot_interest(aid_list) for aid_list in aids_list]
if target_species_list is None:
target_species_list = list(set(ut.flatten(species_list_list)))
zipped = zip(
train_gid_set,
reviewed_list,
data_list,
aids_list,
size_list,
bboxes_list,
species_list_list,
interest_list_list,
)
label_list = []
for (
gid,
reviewed,
data,
aid_list,
(w, h),
bbox_list,
species_list,
interest_list,
) in zipped:
print('Processing GID: %r' % (gid,))
print('\tAIDS : %r' % (aid_list,))
print('\tBBOXES: %r' % (bbox_list,))
if reviewed in [None, 0]:
continue
w = float(w)
h = float(h)
temp_list = []
aoi_counter = 0
zipped = zip(aid_list, bbox_list, species_list, interest_list)
for aid, (xtl, ytl, width, height), species, interest in zipped:
if species not in target_species_list:
continue
if interest is None:
continue
aoi_flag = 1 if interest else 0
aoi_counter += aoi_flag
temp = [xtl / w, ytl / h, (xtl + width) / w, (ytl + height) / h, aoi_flag]
temp = list(map(str, map(float, temp)))
label = '^'.join(temp)
temp_list.append(label)
if len(temp_list) == 0:
continue
if aoi_counter == 0:
continue
values = (
dbname,
gid,
aid,
)
feature_filename = '%s_vgg_feature_gid_%s_aid_%s.npy' % values
feature_filepath = join(raw_path, feature_filename)
with open(feature_filepath, 'w') as feature_file:
np.save(feature_file, data)
label = ';'.join(temp_list)
label = '%s,%s' % (feature_filename, label)
label_list.append(label)
with open(join(labels_path, 'labels.csv'), 'a') as labels:
label_str = '\n'.join(label_list) + '\n'
labels.write(label_str)
return name_path
[docs]def get_aoi2_training_data(
ibs,
image_size=192,
dest_path=None,
target_species_list=None,
train_gid_list=None,
purge=True,
cache=True,
):
"""
Get data for bg
"""
from os.path import join, expanduser, exists
import cv2
if dest_path is None:
dest_path = expanduser(join('~', 'Desktop', 'extracted'))
dbname = ibs.dbname
name = 'aoi2'
dbname = ibs.dbname
name_path = join(dest_path, name)
raw_path = join(name_path, 'raw')
labels_path = join(name_path, 'labels')
if cache and exists(name_path):
print('Using cached exported data')
else:
if purge:
ut.delete(name_path)
ut.ensuredir(dest_path)
ut.ensuredir(name_path)
ut.ensuredir(raw_path)
ut.ensuredir(labels_path)
# gid_list = ibs.get_valid_gids()
if train_gid_list is None:
train_gid_set = set(
ibs.get_imageset_gids(ibs.get_imageset_imgsetids_from_text('TRAIN_SET'))
)
train_gid_list = list(train_gid_set)
# reviewed_list = ibs.get_image_reviewed(train_gid_list)
reviewed_list = [True] * len(train_gid_list)
aids_list = ibs.get_image_aids(train_gid_list)
size_list = ibs.get_image_sizes(train_gid_list)
bboxes_list = [ibs.get_annot_bboxes(aid_list) for aid_list in aids_list]
species_list_list = [
ibs.get_annot_species_texts(aid_list) for aid_list in aids_list
]
interest_list_list = [ibs.get_annot_interest(aid_list) for aid_list in aids_list]
if target_species_list is None:
target_species_list = list(set(ut.flatten(species_list_list)))
mask = np.zeros((image_size, image_size, 1))
zipped = zip(
train_gid_list,
reviewed_list,
aids_list,
size_list,
bboxes_list,
species_list_list,
interest_list_list,
)
label_list = []
for (
gid,
reviewed,
aid_list,
(w, h),
bbox_list,
species_list,
interest_list,
) in zipped:
print('Processing GID: %r' % (gid,))
print('\tAIDS : %r' % (aid_list,))
print('\tBBOXES: %r' % (bbox_list,))
if reviewed in [None, 0]:
continue
w = float(w)
h = float(h)
temp_list = []
aoi_counter = 0
zipped = zip(aid_list, bbox_list, species_list, interest_list)
for aid, (xtl, ytl, width, height), species, interest in zipped:
if species not in target_species_list:
continue
if interest is None:
continue
aoi_flag = 1 if interest else 0
aoi_counter += aoi_flag
temp = [xtl / w, ytl / h, (xtl + width) / w, (ytl + height) / h, aoi_flag]
temp = list(map(str, map(float, temp)))
label = '^'.join(temp)
temp_list.append(label)
if len(temp_list) == 0:
continue
# if aoi_counter == 0:
# continue
image = ibs.get_images(gid)
image_ = cv2.resize(
image, (image_size, image_size), interpolation=cv2.INTER_LANCZOS4
)
image_ = np.dstack((image_, mask))
values = (
dbname,
gid,
)
patch_filename = '%s_image_gid_%s.png' % values
patch_filepath = join(raw_path, patch_filename)
cv2.imwrite(patch_filepath, image_)
label = ';'.join(temp_list)
label = '%s,%s' % (patch_filename, label)
label_list.append(label)
with open(join(labels_path, 'labels.csv'), 'a') as labels:
label_str = '\n'.join(label_list) + '\n'
labels.write(label_str)
return name_path
[docs]def get_cnn_detector_training_images(ibs, dest_path=None, image_size=128):
from os.path import join, expanduser
import cv2
def resize_target(image, target_height=None, target_width=None):
assert target_height is not None or target_width is not None
height, width = image.shape[:2]
if target_height is not None and target_width is not None:
h = target_height
w = target_width
elif target_height is not None:
h = target_height
w = (width / height) * h
elif target_width is not None:
w = target_width
h = (height / width) * w
w, h = int(w), int(h)
return cv2.resize(image, (w, h))
dbname_mapping = {
'ELPH_Master': 'elephant_savanna',
'GIR_Master': 'giraffe_reticulated',
'GZ_Master': 'zebra_grevys',
'NNP_MasterGIRM': 'giraffe_masai',
'PZ_Master1': 'zebra_plains',
}
if dest_path is None:
dest_path = expanduser(join('~', 'Desktop', 'extracted'))
dbname = ibs.dbname
positive_category = dbname_mapping.get(dbname, 'positive')
name = 'saliency_detector'
dbname = ibs.dbname
name_path = join(dest_path, name)
raw_path = join(name_path, 'raw')
labels_path = join(name_path, 'labels')
# ut.remove_dirs(dest_path)
ut.ensuredir(dest_path)
ut.ensuredir(raw_path)
ut.ensuredir(labels_path)
# gid_list = ibs.get_valid_gids()
train_gid_set = set(
ibs.get_imageset_gids(ibs.get_imageset_imgsetids_from_text('TRAIN_SET'))
)
aids_list = ibs.get_image_aids(train_gid_set)
bboxes_list = [ibs.get_annot_bboxes(aid_list) for aid_list in aids_list]
label_list = []
zipped_list = zip(train_gid_set, aids_list, bboxes_list)
global_bbox_list = []
for gid, aid_list, bbox_list in zipped_list:
# if gid > 20:
# continue
image = ibs.get_images(gid)
height, width, channels = image.shape
args = (gid,)
print('Processing GID: %r' % args)
print('\tAIDS : %r' % (aid_list,))
print('\tBBOXES: %r' % (bbox_list,))
image_ = cv2.resize(
image, (image_size, image_size), interpolation=cv2.INTER_LANCZOS4
)
values = (
dbname,
gid,
)
patch_filename = '%s_image_gid_%s.png' % values
patch_filepath = join(raw_path, patch_filename)
cv2.imwrite(patch_filepath, image_)
bbox_list_ = []
for aid, (xtl, ytl, w, h) in zip(aid_list, bbox_list):
xr = round(w / 2)
yr = round(h / 2)
xc = xtl + xr
yc = ytl + yr
# Normalize to unit box
xr /= width
xc /= width
yr /= height
yc /= height
xr = min(1.0, max(0.0, xr))
xc = min(1.0, max(0.0, xc))
yr = min(1.0, max(0.0, yr))
yc = min(1.0, max(0.0, yc))
args = (
xc,
yc,
xr,
yr,
)
bbox_str = '%s:%s:%s:%s' % args
bbox_list_.append(bbox_str)
global_bbox_list.append(args)
# xtl_ = int((xc - xr) * image_size)
# ytl_ = int((yc - yr) * image_size)
# xbr_ = int((xc + xr) * image_size)
# ybr_ = int((yc + yr) * image_size)
# cv2.rectangle(image_, (xtl_, ytl_), (xbr_, ybr_), (0, 255, 0))
# cv2.imshow('', image_)
# cv2.waitKey(0)
aid_list_str = ';'.join(map(str, aid_list))
bbox_list_str = ';'.join(map(str, bbox_list_))
label = '%s,%s,%s,%s' % (
patch_filename,
positive_category,
aid_list_str,
bbox_list_str,
)
label_list.append(label)
with open(join(labels_path, 'labels.csv'), 'a') as labels:
label_str = '\n'.join(label_list) + '\n'
labels.write(label_str)
return global_bbox_list
[docs]def get_cnn_classifier_cameratrap_binary_training_images(
ibs,
positive_imageset_id,
negative_imageset_id,
dest_path=None,
image_size=192,
purge=True,
skip_rate=0.0,
skip_rate_pos=0.0,
skip_rate_neg=0.0,
):
from os.path import join, expanduser
import random
import cv2
if dest_path is None:
dest_path = expanduser(join('~', 'Desktop', 'extracted'))
name = 'classifier-cameratrap'
dbname = ibs.dbname
name_path = join(dest_path, name)
raw_path = join(name_path, 'raw')
labels_path = join(name_path, 'labels')
if purge:
ut.delete(name_path)
ut.ensuredir(name_path)
ut.ensuredir(raw_path)
ut.ensuredir(labels_path)
train_gid_set = set(
ibs.get_imageset_gids(ibs.get_imageset_imgsetids_from_text('TRAIN_SET'))
)
positive_gid_set = set(ibs.get_imageset_gids(positive_imageset_id))
negative_gid_set = set(ibs.get_imageset_gids(negative_imageset_id))
candidate_gid_set = positive_gid_set | negative_gid_set
candidate_gid_set = train_gid_set & candidate_gid_set
label_list = []
for gid in candidate_gid_set:
args = (gid,)
print('Processing GID: %r' % args)
if skip_rate > 0.0 and random.uniform(0.0, 1.0) <= skip_rate:
print('\t Skipping - Sampling')
continue
if gid in positive_gid_set:
category = 'positive'
elif gid in negative_gid_set:
category = 'negative'
else:
print('\t Skipping - No Label')
continue
if (
skip_rate_pos > 0.0
and category == 'positive'
and random.uniform(0.0, 1.0) <= skip_rate_pos
):
print('\t Skipping Positive')
continue
if (
skip_rate_neg > 0.0
and category == 'negative'
and random.uniform(0.0, 1.0) <= skip_rate_neg
):
print('\t Skipping Negative')
continue
image = ibs.get_images(gid)
image_ = cv2.resize(
image, (image_size, image_size), interpolation=cv2.INTER_LANCZOS4
)
values = (
dbname,
gid,
)
patch_filename = '%s_image_gid_%s.png' % values
patch_filepath = join(raw_path, patch_filename)
cv2.imwrite(patch_filepath, image_)
label = '%s,%s' % (
patch_filename,
category,
)
label_list.append(label)
with open(join(labels_path, 'labels.csv'), 'a') as labels:
label_str = '\n'.join(label_list) + '\n'
labels.write(label_str)
return name_path
[docs]def get_cnn_classifier_binary_training_images(
ibs,
category_list,
dest_path=None,
image_size=192,
purge=True,
skip_rate=0.0,
skip_rate_pos=0.0,
skip_rate_neg=0.0,
):
from os.path import join, expanduser
import random
import cv2
if dest_path is None:
dest_path = expanduser(join('~', 'Desktop', 'extracted'))
name = 'classifier-binary'
dbname = ibs.dbname
name_path = join(dest_path, name)
raw_path = join(name_path, 'raw')
labels_path = join(name_path, 'labels')
if purge:
ut.delete(name_path)
ut.ensuredir(name_path)
ut.ensuredir(raw_path)
ut.ensuredir(labels_path)
# gid_list = ibs.get_valid_gids()
train_gid_set = set(
ibs.get_imageset_gids(ibs.get_imageset_imgsetids_from_text('TRAIN_SET'))
)
aids_list = ibs.get_image_aids(train_gid_set)
category_set = set(category_list)
species_set_list = [
set(ibs.get_annot_species_texts(aid_list_)) for aid_list_ in aids_list
]
label_list = []
for gid, species_set in zip(train_gid_set, species_set_list):
args = (gid,)
print('Processing GID: %r' % args)
if skip_rate > 0.0 and random.uniform(0.0, 1.0) <= skip_rate:
print('\t Skipping')
continue
category = 'positive' if len(species_set & category_set) else 'negative'
if (
skip_rate_pos > 0.0
and category == 'positive'
and random.uniform(0.0, 1.0) <= skip_rate_pos
):
print('\t Skipping Positive')
continue
if (
skip_rate_neg > 0.0
and category == 'negative'
and random.uniform(0.0, 1.0) <= skip_rate_neg
):
print('\t Skipping Negative')
continue
image = ibs.get_images(gid)
image_ = cv2.resize(
image, (image_size, image_size), interpolation=cv2.INTER_LANCZOS4
)
values = (
dbname,
gid,
)
patch_filename = '%s_image_gid_%s.png' % values
patch_filepath = join(raw_path, patch_filename)
cv2.imwrite(patch_filepath, image_)
label = '%s,%s' % (
patch_filename,
category,
)
label_list.append(label)
with open(join(labels_path, 'labels.csv'), 'a') as labels:
label_str = '\n'.join(label_list) + '\n'
labels.write(label_str)
return name_path
[docs]def get_cnn_classifier2_training_images(
ibs,
category_set=None,
category_mapping={},
dest_path=None,
train_gid_set=None,
image_size=192,
purge=True,
cache=True,
skip_rate=0.0,
):
from os.path import join, expanduser, exists
import random
import cv2
if dest_path is None:
dest_path = expanduser(join('~', 'Desktop', 'extracted'))
name = 'classifier2'
dbname = ibs.dbname
name_path = join(dest_path, name)
raw_path = join(name_path, 'raw')
labels_path = join(name_path, 'labels')
if train_gid_set is None:
train_gid_set = set(
ibs.get_imageset_gids(ibs.get_imageset_imgsetids_from_text('TRAIN_SET'))
)
aids_list = ibs.get_image_aids(train_gid_set)
species_set_list = [
set(ibs.get_annot_species_texts(aid_list_)) for aid_list_ in aids_list
]
if category_set is None:
category_set = map(list, species_set_list)
category_set = ut.flatten(category_set)
category_set = set(category_set)
category_list = list(sorted(category_set))
if cache and exists(name_path):
print('Using cached exported data')
else:
if purge:
ut.delete(name_path)
ut.ensuredir(name_path)
ut.ensuredir(raw_path)
ut.ensuredir(labels_path)
label_list = []
for gid, species_set in zip(train_gid_set, species_set_list):
args = (gid,)
print('Processing GID: %r' % args)
if skip_rate > 0.0 and random.uniform(0.0, 1.0) <= skip_rate:
print('\t Skipping')
continue
species_set = set(
[category_mapping.get(species, species) for species in species_set]
)
category_list_ = [
category for category in category_list if category in species_set
]
if len(category_list_) == 0:
print('\t Skipping (Categories)')
continue
image = ibs.get_images(gid)
image_ = cv2.resize(
image, (image_size, image_size), interpolation=cv2.INTER_LANCZOS4
)
values = (
dbname,
gid,
)
patch_filename = '%s_image_gid_%s.png' % values
patch_filepath = join(raw_path, patch_filename)
cv2.imwrite(patch_filepath, image_)
category = ';'.join(category_list_)
label = '%s,%s' % (
patch_filename,
category,
)
label_list.append(label)
with open(join(labels_path, 'labels.csv'), 'a') as labels:
label_str = '\n'.join(label_list) + '\n'
labels.write(label_str)
with open(join(labels_path, 'categories.csv'), 'a') as categories:
category_str = '\n'.join(category_list) + '\n'
categories.write(category_str)
return name_path, category_list
[docs]def get_cnn_labeler_training_images(
ibs,
dest_path=None,
image_size=128,
category_list=None,
min_examples=10,
category_mapping=None,
viewpoint_mapping=None,
purge=True,
strict=True,
skip_rate=0.0,
):
from os.path import join, expanduser
import random
import cv2
if dest_path is None:
dest_path = expanduser(join('~', 'Desktop', 'extracted'))
name = 'labeler'
dbname = ibs.dbname
name_path = join(dest_path, name)
raw_path = join(name_path, 'raw')
labels_path = join(name_path, 'labels')
if purge:
ut.delete(name_path)
ut.ensuredir(name_path)
ut.ensuredir(raw_path)
ut.ensuredir(labels_path)
print('category mapping = %s' % (ut.repr3(category_mapping),))
print('viewpoint mapping = %s' % (ut.repr3(viewpoint_mapping),))
# train_gid_set = ibs.get_valid_gids()
train_gid_set = set(
ibs.get_imageset_gids(ibs.get_imageset_imgsetids_from_text('TRAIN_SET'))
)
aids_list = ibs.get_image_aids(train_gid_set)
# bboxes_list = [ ibs.get_annot_bboxes(aid_list) for aid_list in aids_list ]
# aid_list = ibs.get_valid_aids()
aid_list = ut.flatten(aids_list)
# import random
# random.shuffle(aid_list)
# aid_list = sorted(aid_list[:100])
species_list = ibs.get_annot_species_texts(aid_list)
if category_mapping is not None:
species_list = [
category_mapping.get(species, species) for species in species_list
]
species_set = set(species_list)
yaw_list = ibs.get_annot_viewpoints(aid_list)
if category_list is None:
category_list = sorted(list(species_set))
undesired_list = [
'unspecified_animal',
ibs.get_species_nice(ibs.const.UNKNOWN_SPECIES_ROWID),
]
for undesired_species in undesired_list:
if undesired_species in category_list:
category_list.remove(undesired_species)
category_set = set(category_list)
# Filter the tup_list based on the requested categories
tup_list = list(zip(aid_list, species_list, yaw_list))
old_len = len(tup_list)
tup_list = [
(
aid,
species,
viewpoint_mapping.get(species, {}).get(yaw, yaw),
)
for aid, species, yaw in tup_list
if species in category_set
]
new_len = len(tup_list)
print(
'Filtered annotations: keep %d / original %d'
% (
new_len,
old_len,
)
)
# Skip any annotations that are of the wanted category and don't have a specified viewpoint
counter = 0
seen_dict = {}
yaw_dict = {}
for tup in tup_list:
aid, species, yaw = tup
# Keep track of the number of overall instances
if species not in seen_dict:
seen_dict[species] = 0
seen_dict[species] += 1
# Keep track of yaws that aren't None
if yaw is not None:
if species not in yaw_dict:
yaw_dict[species] = {}
if yaw not in yaw_dict[species]:
yaw_dict[species][yaw] = 0
yaw_dict[species][yaw] += 1
else:
counter += 1
# Get the list of species that do not have enough viewpoint examples for training
invalid_seen_set = set([])
invalid_yaw_set = set([])
for species in seen_dict:
# Check that the number of instances is above the min_examples
if seen_dict[species] < min_examples:
invalid_seen_set.add(species)
continue
# If the species has viewpoints, check them as well
if strict:
if species in yaw_dict:
# Check that all viewpoints exist
# if len(yaw_dict[species]) < 8:
# invalid_yaw_set.add(species)
# continue
# Check that all viewpoints have a minimum number of instances
for yaw in yaw_dict[species]:
# assert yaw in ibs.const.VIEWTEXT_TO_YAW_RADIANS
if yaw_dict[species][yaw] < min_examples:
invalid_yaw_set.add(species)
continue
else:
invalid_yaw_set.add(species)
continue
print('Null yaws: %d' % (counter,))
valid_seen_set = category_set - invalid_seen_set
valid_yaw_set = valid_seen_set - invalid_yaw_set
print('Requested categories:')
category_set = sorted(category_set)
ut.print_list(category_set)
# print('Invalid yaw categories:')
# ut.print_list(sorted(invalid_yaw_set))
# print('Valid seen categories:')
# ut.print_list(sorted(valid_seen_set))
print('Valid yaw categories:')
valid_yaw_set = sorted(valid_yaw_set)
ut.print_list(valid_yaw_set)
print('Invalid seen categories (could not fulfill request):')
invalid_seen_set = sorted(invalid_seen_set)
ut.print_list(invalid_seen_set)
skipped_yaw = 0
skipped_seen = 0
tup_list_ = []
aid_list_ = []
for tup in tup_list:
aid, species, yaw = tup
if species in valid_yaw_set:
# If the species is valid, but this specific annotation has no yaw, skip it
if yaw is None:
skipped_yaw += 1
continue
category = '%s:%s' % (
species,
yaw,
)
elif species in valid_seen_set:
category = '%s' % (species,)
else:
skipped_seen += 1
continue
tup_list_.append((tup, category))
aid_list_.append(aid)
print(
'Skipped Yaw: skipped %d / total %d'
% (
skipped_yaw,
len(tup_list),
)
)
print(
'Skipped Seen: skipped %d / total %d'
% (
skipped_seen,
len(tup_list),
)
)
# Precompute chips
ibs.compute_all_chips(aid_list_)
# Get training data
label_list = []
for tup, category in tup_list_:
aid, species, yaw = tup
args = (aid,)
print('Processing AID: %r' % args)
if skip_rate > 0.0 and random.uniform(0.0, 1.0) <= skip_rate:
print('\t Skipping')
continue
# Compute data
image = ibs.get_annot_chips(aid)
image_ = cv2.resize(
image, (image_size, image_size), interpolation=cv2.INTER_LANCZOS4
)
values = (
dbname,
aid,
)
patch_filename = '%s_annot_aid_%s.png' % values
patch_filepath = join(raw_path, patch_filename)
cv2.imwrite(patch_filepath, image_)
# Compute label
label = '%s,%s' % (
patch_filename,
category,
)
label_list.append(label)
print('Using labels for labeler training:')
label_list_ = set([_[1] for _ in tup_list_])
label_list_ = sorted(label_list_)
ut.print_list(label_list_)
with open(join(labels_path, 'labels.csv'), 'a') as labels:
label_str = '\n'.join(label_list) + '\n'
labels.write(label_str)
return name_path
[docs]def get_cnn_qualifier_training_images(ibs, dest_path=None, image_size=128, purge=True):
from os.path import join
import cv2
if dest_path is None:
dest_path = ut.truepath('~/Desktop/extracted')
name = 'qualifier'
dbname = ibs.dbname
name_path = join(dest_path, name)
raw_path = join(name_path, 'raw')
labels_path = join(name_path, 'labels')
if purge:
ut.delete(name_path)
ut.ensuredir(name_path)
ut.ensuredir(raw_path)
ut.ensuredir(labels_path)
# gid_list = ibs.get_valid_gids()
train_gid_set = set(
ibs.get_imageset_gids(ibs.get_imageset_imgsetids_from_text('TRAIN_SET'))
)
aids_list = ibs.get_image_aids(train_gid_set)
# bboxes_list = [ ibs.get_annot_bboxes(aid_list) for aid_list in aids_list ]
# aid_list = ibs.get_valid_aids()
aid_list = ut.flatten(aids_list)
flag_list = ibs.get_annot_reviewed(aid_list)
aid_list = [aid for aid, flag in zip(aid_list, flag_list) if flag]
print('Outputing a total of %d annotations' % (len(aid_list),))
# import random
# random.shuffle(aid_list)
# aid_list = sorted(aid_list[:100])
quality_list = ibs.get_annot_quality_texts(aid_list)
label_list = []
for aid, quality in zip(aid_list, quality_list):
args = (aid,)
print('Processing AID: %r' % args)
image = ibs.get_annot_chips(aid)
image_ = cv2.resize(
image, (image_size, image_size), interpolation=cv2.INTER_LANCZOS4
)
values = (
dbname,
aid,
)
patch_filename = '%s_annot_aid_%s.png' % values
patch_filepath = join(raw_path, patch_filename)
cv2.imwrite(patch_filepath, image_)
category = quality.lower()
label = '%s,%s' % (
patch_filename,
category,
)
label_list.append(label)
with open(join(labels_path, 'labels.csv'), 'a') as labels:
label_str = '\n'.join(label_list) + '\n'
labels.write(label_str)
[docs]def get_orientation_training_images(ibs, dest_path=None, **kwargs):
"""
Gets data for training a patch match network.
Args:
ibs (IBEISController): ibeis controller object
max_examples (None): (default = None)
num_top (int): (default = 3)
controlled (bool): (default = True)
Returns:
tuple : patchmatch_tup = (aid1_list, aid2_list, kpts1_m_list,
kpts2_m_list, fm_list, metadata_lists)
aid pairs and matching keypoint pairs as well as the original index
of the feature matches
CommandLine:
python -m wbia_cnn.ingest_wbia --test-get_orientation_training_images --dbdir /Datasets/BACKGROUND/PZ_Master1
Example:
>>> # ENABLE_DOCTEST
>>> from wbia_cnn.ingest_wbia import * # NOQA
>>> import wbia
>>> # build test data
>>> ibs = wbia.opendb(defaultdb='PZ_MTEST')
>>> get_orientation_training_images(ibs)
"""
from os.path import join, expanduser
import cv2
dbname_mapping = {
'ELPH_Master': 'elephant_savanna',
'GIR_Master': 'giraffe_reticulated',
'GZ_Master': 'zebra_grevys',
'NNP_MasterGIRM': 'giraffe_masai',
'PZ_Master1': 'zebra_plains',
}
if dest_path is None:
dest_path = expanduser(join('~', 'Desktop', 'extracted'))
dbname = ibs.dbname
positive_category = dbname_mapping.get(dbname, 'positive') # NOQA
name = 'orientation'
dbname = ibs.dbname
name_path = join(dest_path, name)
raw_path = join(name_path, 'raw')
labels_path = join(name_path, 'labels')
# ut.remove_dirs(dest_path)
ut.ensuredir(dest_path)
ut.ensuredir(raw_path)
ut.ensuredir(labels_path)
# gid_list = ibs.get_valid_gids()
train_gid_set = set(
ibs.get_imageset_gids(ibs.get_imageset_imgsetids_from_text('TRAIN_SET'))
)
vals = extract_orientation_chips(ibs, train_gid_set, **kwargs)
label_list = []
zipped_list = zip(*vals)
for chip, theta, tag in zipped_list:
chip_filename = '%s.png' % (tag,)
chip_filepath = join(raw_path, chip_filename)
cv2.imwrite(chip_filepath, chip)
label = '%s,%s' % (chip_filename, theta)
label_list.append(label)
with open(join(labels_path, 'labels.csv'), 'a') as labels:
label_str = '\n'.join(label_list) + '\n'
labels.write(label_str)
if __name__ == '__main__':
"""
CommandLine:
python -m wbia_cnn.ingest_wbia
python -m wbia_cnn.ingest_wbia --allexamples
python -m wbia_cnn.ingest_wbia --allexamples --noface --nosrc
"""
import multiprocessing
multiprocessing.freeze_support() # for win32
import utool as ut # NOQA
ut.doctest_funcs()