21 #include "allheaders.h" 63 int x_scale,
int y_scale,
int num_features) {
65 stride_map.
ScaleXY(x_scale, y_scale);
66 ResizeToMap(src.int_mode_, stride_map, num_features);
73 ResizeToMap(src.int_mode_, stride_map, num_features);
80 for (
int t = 0; t < width; ++t) {
95 if (end_x < full_width) {
98 int fill_size = num_features * (full_width - end_x);
110 if (end_y < full_height) {
114 int fill_size = num_features * full_width * (full_height - end_y);
129 static void ComputeBlackWhite(Pix* pix,
float* black,
float* white) {
130 int width = pixGetWidth(pix);
131 int height = pixGetHeight(pix);
132 STATS mins(0, 256), maxes(0, 256);
135 l_uint32* line = pixGetData(pix) + pixGetWpl(pix) * y;
136 int prev = GET_DATA_BYTE(line, 0);
137 int curr = GET_DATA_BYTE(line, 1);
138 for (
int x = 1; x + 1 < width; ++x) {
139 int next = GET_DATA_BYTE(line, x + 1);
140 if ((curr < prev && curr <= next) || (curr <= prev && curr < next)) {
144 if ((curr > prev && curr >= next) || (curr >= prev && curr > next)) {
152 if (mins.get_total() == 0) mins.add(0, 1);
154 *black = mins.ile(0.25);
155 *white = maxes.
ile(0.75);
163 std::vector<const Pix*> pixes(1, pix);
171 const std::vector<const Pix*>& pixes,
173 int target_height = shape.
height();
174 int target_width = shape.
width();
175 std::vector<std::pair<int, int>> h_w_pairs;
176 for (
auto pix : pixes) {
177 Pix* var_pix =
const_cast<Pix*
>(pix);
178 int width = pixGetWidth(var_pix);
179 if (target_width != 0) width = target_width;
180 int height = pixGetHeight(var_pix);
181 if (target_height != 0) height = target_height;
182 h_w_pairs.emplace_back(height, width);
187 for (
size_t b = 0; b < pixes.size(); ++b) {
188 Pix* pix =
const_cast<Pix*
>(pixes[b]);
189 float black = 0.0f, white = 255.0f;
190 if (shape.
depth() != 3) ComputeBlackWhite(pix, &black, &white);
191 float contrast = (white - black) / 2.0
f;
192 if (contrast <= 0.0
f) contrast = 1.0f;
193 if (shape.
height() == 1) {
210 int width = pixGetWidth(pix);
211 int height = pixGetHeight(pix);
212 int wpl = pixGetWpl(pix);
219 bool color = num_features == 3;
220 if (width > target_width) width = target_width;
221 uinT32* line = pixGetData(pix);
222 for (
int y = 0; y < target_height; ++y, line += wpl) {
225 for (x = 0; x < width; ++x, ++t) {
228 for (
int c = COLOR_RED; c <= COLOR_BLUE; ++c) {
229 int pixel = GET_DATA_BYTE(line + x, c);
230 SetPixel(t, f++, pixel, black, contrast);
233 int pixel = GET_DATA_BYTE(line, x);
234 SetPixel(t, 0, pixel, black, contrast);
238 for (; x < target_width; ++x)
Randomize(t++, 0, num_features, randomizer);
247 float contrast,
TRand* randomizer) {
248 int width = pixGetWidth(pix);
249 int height = pixGetHeight(pix);
251 int wpl = pixGetWpl(pix);
256 if (width > target_width) width = target_width;
258 for (x = 0; x < width; ++x, ++t) {
259 for (
int y = 0; y < height; ++y) {
260 uinT32* line = pixGetData(pix) + wpl * y;
261 int pixel = GET_DATA_BYTE(line, x);
262 SetPixel(t, y, pixel, black, contrast);
265 for (; x < target_width; ++x)
Randomize(t++, 0, height, randomizer);
276 float float_pixel = (pixel - black) / contrast - 1.0f;
281 f_[t][
f] = float_pixel;
291 int feature_factor = 1;
292 if (num_features == 3) {
297 Pix* pix = pixCreate(im_width, im_height * num_features, 32);
306 for (
int y = 0; y < num_features; ++y, im_y += im_height) {
307 int pixel = features[y * feature_factor];
310 int green = red, blue = red;
311 if (feature_factor == 3) {
313 green =
ClipToRange(features[y * feature_factor + 1] + 128, 0, 255);
314 blue =
ClipToRange(features[y * feature_factor + 2] + 128, 0, 255);
315 }
else if (num_features > 3) {
318 red = abs(pixel) * 2;
327 pixSetPixel(pix, im_x, im_y, (red << L_RED_SHIFT) |
328 (green << L_GREEN_SHIFT) |
329 (blue << L_BLUE_SHIFT));
333 for (
int y = 0; y < num_features; ++y, im_y += im_height) {
334 float pixel = features[y * feature_factor];
337 int green = red, blue = red;
338 if (feature_factor == 3) {
340 pixel = features[y * feature_factor + 1];
342 pixel = features[y * feature_factor + 2];
344 }
else if (num_features > 3) {
356 pixSetPixel(pix, im_x, im_y, (red << L_RED_SHIFT) |
357 (green << L_GREEN_SHIFT) |
358 (blue << L_BLUE_SHIFT));
368 for (
int y = 0; y < num_features; ++y) {
369 for (
int t = 0; t <
Width(); ++t) {
370 if (num == 0 || t < num || t + num >=
Width()) {
386 memcpy(i_[dest_t], src.i_[src_t], i_.
dim2() *
sizeof(i_[0][0]));
388 memcpy(f_[dest_t], src.f_[src_t], f_.
dim2() *
sizeof(f_[0][0]));
395 int src_t,
int src_offset) {
398 memcpy(i_[dest_t] + dest_offset, src.i_[src_t] + src_offset,
399 num_features *
sizeof(i_[0][0]));
401 memcpy(f_[dest_t] + dest_offset, src.f_[src_t] + src_offset,
402 num_features *
sizeof(f_[0][0]));
420 for (
int i = 0;
i < num_features; ++
i)
424 float* line = f_[t] +
offset;
425 for (
int i = 0;
i < num_features; ++
i)
432 int null_ch,
float* rating,
433 float* certainty)
const {
434 if (t_end <= t_start)
return -1;
436 float min_score = 0.0f;
438 if (c == not_this || c == null_ch)
continue;
440 if (max_char < 0 || *rating < min_score) {
451 float* rating,
float* certainty)
const {
455 if (t_end <= t_start || t_end <= 0)
return;
456 float ratings[3] = {0.0f, 0.0f, 0.0f};
457 float certs[3] = {0.0f, 0.0f, 0.0f};
458 for (
int t = t_start; t < t_end; ++t) {
459 const float* line = f_[t];
467 for (
int i = 2;
i >= 1; --
i) {
468 if (ratings[
i] > ratings[
i - 1]) {
469 ratings[
i] = ratings[
i - 1];
470 certs[
i] = certs[
i - 1];
474 if (zero < certs[2]) certs[2] = zero;
476 if (score < certs[1]) certs[1] = score;
479 if (zero < certs[0]) certs[0] = zero;
481 int best_i = ratings[2] < ratings[1] ? 2 : 1;
482 *rating = ratings[best_i] + t_end - t_start;
483 *certainty = certs[best_i];
490 float* score)
const {
494 const float* line = f_[t];
495 for (
int i = 0;
i < f_.
dim2(); ++
i) {
496 if (line[
i] > best_score &&
i != not_this &&
i != not_that) {
497 best_score = line[
i];
509 int length = labels.
size();
510 int last_start = end - length;
512 double best_score = 0.0;
513 for (
int s = start; s <= last_start; ++s) {
515 if (score > best_score || best_start < 0) {
527 int length = labels.
size();
529 for (
int i = 0;
i < length; ++
i) {
530 score += f_(start +
i, labels[
i]);
540 float bad_score = (1.0f - ok_score) / (num_classes - 1);
541 float* targets = f_[t];
542 for (
int i = 0;
i < num_classes; ++
i)
543 targets[
i] = bad_score;
544 targets[label] = ok_score;
555 float* targets = f_[t];
556 for (
int c = 0; c < num_classes; ++c) {
558 targets[c] += (1.0 - targets[c]) * (2 / 3.0);
581 for (
int t = 0; t <
Width(); ++t) {
583 for (
int y = 0; y < num_features; ++y) {
584 float grad = features[y];
585 if (grad < -confidence_thr) {
587 if ((t == 0 || f_[t - 1][y] < confidence_thr / 2) &&
588 (t + 1 ==
Width() || f_[t + 1][y] < confidence_thr / 2)) {
600 const inT8* line = i_[t];
601 for (
int i = 0;
i < i_.
dim2(); ++
i) {
602 output[
i] =
static_cast<double>(line[
i]) /
MAX_INT8;
605 const float* line = f_[t];
606 for (
int i = 0;
i < f_.
dim2(); ++
i) {
607 output[
i] =
static_cast<double>(line[
i]);
616 const inT8* line = i_[t];
617 for (
int i = 0;
i < num_features; ++
i) {
618 inout[
i] +=
static_cast<double>(line[
i]) /
MAX_INT8;
621 const float* line = f_[t];
622 for (
int i = 0;
i < num_features; ++
i) {
630 float* inout)
const {
633 for (
int i = 0;
i < num_features; ++
i) {
634 inout[
i] +=
static_cast<float>(line[
i]) /
MAX_INT8;
637 const float* line = f_[t] +
offset;
638 for (
int i = 0;
i < num_features; ++
i) {
652 const double* input) {
655 for (
int i = 0;
i < num_features; ++
i) {
657 -MAX_INT8, MAX_INT8);
660 float* line = f_[t] +
offset;
661 for (
int i = 0;
i < num_features; ++
i) {
662 line[
i] =
static_cast<float>(input[
i]);
673 inT8* dest_line = i_[dest_t];
674 const inT8* src_line = src.i_[src_t];
675 for (
int i = 0;
i < dim; ++
i) {
676 if (dest_line[
i] < src_line[
i]) {
677 dest_line[
i] = src_line[
i];
683 float* dest_line = f_[dest_t];
684 const float* src_line = src.f_[src_t];
685 for (
int i = 0;
i < dim; ++
i) {
686 if (dest_line[
i] < src_line[
i]) {
687 dest_line[
i] = src_line[
i];
702 const int* max_line = maxes[t];
703 const float* fwd_line = fwd.f_[t];
704 int num_features = fwd.f_.
dim2();
705 for (
int i = 0;
i < num_features; ++
i) {
706 f_[max_line[
i]][
i] = fwd_line[
i];
713 float min_max = 0.0f;
716 for (
int t = 0; t < width; ++t) {
719 const inT8* column = i_[t];
720 for (
int i = 0;
i < num_features; ++
i) {
721 if (column[
i] > max_value) max_value = column[
i];
724 const float* column = f_[t];
725 for (
int i = 0;
i < num_features; ++
i) {
726 if (column[
i] > max_value) max_value = column[
i];
729 if (t == 0 || max_value < min_max) min_max = max_value;
744 for (
int t = 0; t < width; ++t) {
745 inT8* out_line = i_[t];
746 const inT8* base_line = base_output.i_[t];
747 const inT8* comb_line = combiner_output.i_[t];
748 float base_weight =
static_cast<float>(comb_line[no]) /
MAX_INT8;
749 float boost_weight = 1.0f - base_weight;
750 for (
int i = 0;
i < no; ++
i) {
752 comb_line[
i] * boost_weight);
756 for (
int t = 0; t < width; ++t) {
757 float* out_line = f_[t];
758 const float* base_line = base_output.f_[t];
759 const float* comb_line = combiner_output.f_[t];
760 float base_weight = comb_line[no];
761 float boost_weight = 1.0f - base_weight;
762 for (
int i = 0;
i < no; ++
i) {
763 out_line[
i] = base_line[
i] * base_weight + comb_line[
i] * boost_weight;
779 for (
int t = 0; t < width; ++t) {
780 const float* delta_line = fwd_deltas.f_[t];
781 const float* base_line = base_output.f_[t];
782 float* comb_line = f_[t];
783 float base_weight = comb_line[no];
784 float boost_weight = 1.0f - base_weight;
785 float max_base_delta = 0.0;
786 for (
int i = 0;
i < no; ++
i) {
788 float output = base_line[
i] * base_weight + comb_line[
i] * boost_weight;
790 float comb_target = delta_line[
i] + output;
791 comb_line[
i] = comb_target - comb_line[
i];
792 float base_delta = fabs(comb_target - base_line[
i]);
793 if (base_delta > max_base_delta) max_base_delta = base_delta;
795 if (max_base_delta >= 0.5) {
798 comb_line[no] = 0.0 - base_weight;
801 for (
int i = 0;
i < no; ++
i) {
803 if (comb_line[
i] > 0.0) comb_line[
i] -= 1.0;
805 comb_line[no] = 1.0 - base_weight;
836 float src_max = src.f_.
MaxAbs();
838 float scale_max = scale.f_.
MaxAbs();
840 if (src_max > 0.0
f) {
841 float factor = scale_max / src_max;
842 for (
int t = 0; t < src.
Width(); ++t) {
843 const float* src_ptr = src.f_[t];
844 float* dest_ptr = f_[t];
845 for (
int i = 0;
i < src.f_.
dim2(); ++
i) dest_ptr[
i] = src_ptr[
i] * factor;
855 Resize(src, num_features);
863 int fwd_t = fwd_index.
t();
864 int rev_t = rev_index.
t();
874 Resize(src, num_features);
893 stride_map_ = src.stride_map_;
910 }
while (src_b_index.AddOffset(1,
FD_BATCH) &&
919 int width = src.
Width();
924 for (
int t = 0; t < width; ++t) {
925 memcpy(i_[t] + feature_offset, src.i_[t],
926 num_features *
sizeof(i_[t][0]));
928 for (
int t = width; t < i_.
dim1(); ++t) {
929 memset(i_[t], 0, num_features *
sizeof(i_[t][0]));
932 for (
int t = 0; t < width; ++t) {
933 memcpy(f_[t] + feature_offset, src.f_[t],
934 num_features *
sizeof(f_[t][0]));
936 for (
int t = width; t < f_.
dim1(); ++t) {
937 memset(f_[t], 0, num_features *
sizeof(f_[t][0]));
940 return num_features + feature_offset;
947 Resize(src, num_features);
948 int width = src.
Width();
951 for (
int t = 0; t < width; ++t) {
952 memcpy(i_[t], src.i_[t] + feature_offset,
953 num_features *
sizeof(i_[t][0]));
956 for (
int t = 0; t < width; ++t) {
957 memcpy(f_[t], src.f_[t] + feature_offset,
958 num_features *
sizeof(f_[t][0]));
967 for (
int t = 0; t < width; ++t) dest->
WriteStrided(t, f_[t]);
975 for (
int i = 0;
i < dim; ++
i)
void CopyUnpacking(const NetworkIO &src, int feature_offset, int num_features)
void CopyTimeStepFrom(int dest_t, const NetworkIO &src, int src_t)
void ZeroInvalidElements()
bool AddOffset(int offset, FlexDimensions dimension)
void ReadTimeStep(int t, double *output) const
bool AnySuspiciousTruth(float confidence_thr) const
int index(FlexDimensions dimension) const
void MaxpoolTimeStep(int dest_t, const NetworkIO &src, int src_t, int *max_line)
void Resize2d(bool int_mode, int width, int num_features)
void FromPix(const StaticShape &shape, const Pix *pix, TRand *randomizer)
int BestLabel(int t, float *score) const
int MaxIndexOfDim(FlexDimensions dim) const
void AddTimeStep(int t, double *inout) const
void SubtractAllFromFloat(const NetworkIO &src)
double ScoreOfLabels(const GenericVector< int > &labels, int start) const
void SetActivations(int t, int label, float ok_score)
void CopyWithXYTranspose(const NetworkIO &src)
void Resize(const NetworkIO &src, int num_features)
void WriteStrided(int t, const float *data)
int IntCastRounded(double x)
const float kMinCertainty
void ResizeScaled(const NetworkIO &src, int x_scale, int y_scale, int num_features)
void ScoresOverRange(int t_start, int t_end, int choice, int null_ch, float *rating, float *certainty) const
int CopyPacking(const NetworkIO &src, int feature_offset)
void AddAllToFloat(const NetworkIO &src)
int Size(FlexDimensions dimension) const
void MaxpoolBackward(const NetworkIO &fwd, const GENERIC_2D_ARRAY< int > &maxes)
void EnsureBestLabel(int t, int label)
void Copy2DImage(int batch, Pix *pix, float black, float contrast, TRand *randomizer)
void CopyAll(const NetworkIO &src)
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
void CopyTimeStepGeneral(int dest_t, int dest_offset, int num_features, const NetworkIO &src, int src_t, int src_offset)
void add(inT32 value, inT32 count)
void CopyWithNormalization(const NetworkIO &src, const NetworkIO &scale)
void ZeroTimeStepGeneral(int t, int offset, int num_features)
void ResizeNoInit(int size1, int size2)
void AddTimeStepPart(int t, int offset, int num_features, float *inout) const
void ResizeXTo1(const NetworkIO &src, int num_features)
static float ProbToCertainty(float prob)
void CombineOutputs(const NetworkIO &base_output, const NetworkIO &combiner_output)
void Print(int num) const
const inT8 * i(int t) const
const StrideMap & stride_map() const
void Copy1DGreyImage(int batch, Pix *pix, float black, float contrast, TRand *randomizer)
void CopyWithXReversal(const NetworkIO &src)
void Randomize(int t, int offset, int num_features, TRand *randomizer)
double ile(double frac) const
void ZeroVector(int n, T *vec)
void CopyWithYReversal(const NetworkIO &src)
void WriteTimeStep(int t, const double *input)
void ComputeCombinerDeltas(const NetworkIO &fwd_deltas, const NetworkIO &base_output)
void Transpose(TransposedArray *dest) const
double SignedRand(double range)
void ResizeToMap(bool int_mode, const StrideMap &stride_map, int num_features)
void ClipVector(int t, float range)
void WriteTimeStepPart(int t, int offset, int num_features, const double *input)
int PositionOfBestMatch(const GenericVector< int > &labels, int start, int end) const
void ScaleXY(int x_factor, int y_factor)
void SetStride(const std::vector< std::pair< int, int >> &h_w_pairs)
void SetPixel(int t, int f, int pixel, float black, float contrast)
void FromPixes(const StaticShape &shape, const std::vector< const Pix *> &pixes, TRand *randomizer)
int BestChoiceOverRange(int t_start, int t_end, int not_this, int null_ch, float *rating, float *certainty) const