31 int width = input.
dim1();
32 int num_features = input.
dim2();
34 for (
int t = 0; t < width; ++t)
WriteStrided(t, input[t]);
40 float weight_range,
TRand* randomizer) {
42 wf_.Resize(no, ni, 0.0);
43 if (randomizer != NULL) {
44 for (
int i = 0; i < no; ++i) {
45 for (
int j = 0; j < ni; ++j) {
46 wf_[i][j] = randomizer->
SignedRand(weight_range);
50 use_ada_grad_ = ada_grad;
63 wi_.ResizeNoInit(wf_.dim1(), wf_.dim2());
64 scales_.init_to_size(wi_.dim1(), 0.0);
65 int dim2 = wi_.dim2();
66 for (
int t = 0; t < wi_.dim1(); ++t) {
67 double* f_line = wf_[t];
68 inT8* i_line = wi_[t];
70 for (
int f = 0; f <
dim2; ++f) {
71 double abs_val = fabs(f_line[f]);
72 if (abs_val > max_abs) max_abs = abs_val;
76 if (scale == 0.0) scale = 1.0;
77 for (
int f = 0; f <
dim2; ++f) {
81 wf_.Resize(1, 1, 0.0);
88 int no = int_mode_ ? wi_.dim1() : wf_.dim1();
89 int ni = int_mode_ ? wi_.dim2() : wf_.dim2();
90 dw_.Resize(no, ni, 0.0);
91 updates_.Resize(no, ni, 0.0);
93 if (use_ada_grad_) dw_sq_sum_.Resize(no, ni, 0.0);
111 if (fp->
FWrite(&mode,
sizeof(mode), 1) != 1)
return false;
113 if (!wi_.Serialize(fp))
return false;
114 if (!scales_.Serialize(fp))
return false;
116 if (!wf_.Serialize(fp))
return false;
117 if (training && !updates_.Serialize(fp))
return false;
118 if (training && use_ada_grad_ && !dw_sq_sum_.Serialize(fp))
return false;
127 if (fp->
FRead(&mode,
sizeof(mode), 1) != 1)
return false;
130 if ((mode & kDoubleFlag) == 0)
return DeSerializeOld(training, fp);
132 if (!wi_.DeSerialize(fp))
return false;
133 if (!scales_.DeSerialize(fp))
return false;
135 if (!wf_.DeSerialize(fp))
return false;
138 if (!updates_.DeSerialize(fp))
return false;
139 if (use_ada_grad_ && !dw_sq_sum_.DeSerialize(fp))
return false;
150 if (!wi_.DeSerialize(fp))
return false;
154 for (
int i = 0; i < old_scales.
size(); ++i) scales_[i] = old_scales[i];
157 FloatToDouble(float_array, &wf_);
162 FloatToDouble(float_array, &updates_);
176 MatrixDotVectorInternal(wf_,
true,
false, u, v);
181 int num_out = wi_.dim1();
182 int num_in = wi_.dim2() - 1;
183 for (
int i = 0; i < num_out; ++i) {
184 const inT8* Wi = wi_[i];
189 for (
int j = 0; j < num_in; ++j) total += Wi[j] * u[j];
192 v[i] = (
static_cast<double>(total) /
MAX_INT8 + Wi[num_in]) * scales_[i];
202 const double*
u = wf_[0];
203 for (
int i = 0; i < n; ++i) {
204 inout[i] += u[i] * v[i];
214 MatrixDotVectorInternal(wf_t_,
false,
true, u, v);
226 int num_outputs = dw_.dim1();
229 int num_inputs = dw_.dim2() - 1;
230 int num_samples = u.
dim2();
234 #pragma omp parallel for num_threads(4) if (in_parallel) 236 for (
int i = 0; i < num_outputs; ++i) {
237 double* dwi = dw_[i];
238 const double* ui = u[i];
239 for (
int j = 0; j < num_inputs; ++j) {
244 for (
int k = 0; k < num_samples; ++k) total += ui[k];
245 dwi[num_inputs] = total;
255 if (use_ada_grad_ && num_samples > 0) {
256 dw_sq_sum_.SumSquares(dw_);
257 dw_.AdaGradScaling(dw_sq_sum_, num_samples);
259 dw_ *= learning_rate;
261 if (momentum > 0.0) wf_ += updates_;
262 if (momentum >= 0.0) updates_ *= momentum;
263 wf_t_.Transpose(wf_);
277 double* changed)
const {
278 int num_outputs = updates_.dim1();
279 int num_inputs = updates_.dim2();
282 for (
int i = 0; i < num_outputs; ++i) {
283 const double* this_i = updates_[i];
284 const double* other_i = other.updates_[i];
285 for (
int j = 0; j < num_inputs; ++j) {
286 double product = this_i[j] * other_i[j];
298 static void HistogramWeight(
double weight,
STATS* histogram) {
299 int bucket = kHistogramBuckets - 1;
301 double logval = -log2(fabs(weight));
304 histogram->
add(bucket, 1);
308 STATS histogram(0, kHistogramBuckets);
310 for (
int i = 0; i < wi_.dim1(); ++i) {
311 for (
int j = 0; j < wi_.dim2(); ++j) {
312 HistogramWeight(wi_[i][j] * scales_[i], &histogram);
316 for (
int i = 0; i < wf_.dim1(); ++i) {
317 for (
int j = 0; j < wf_.dim2(); ++j) {
318 HistogramWeight(wf_[i][j], &histogram);
341 for (
int k = 0; k < n; ++k) total += u[k] * v[k];
353 for (
int i = 0; i <
dim1; ++i) {
354 const float* wfi = wf[i];
355 double* wdi = (*wd)[i];
356 for (
int j = 0; j < dim2; ++j) wdi[j] = static_cast<double>(wfi[j]);
370 bool skip_bias_back,
const double*
u,
372 int num_results = w.
dim1() - skip_bias_back;
373 int extent = w.
dim2() - add_bias_fwd;
374 for (
int i = 0; i < num_results; ++i) {
375 const double* wi = w[i];
377 if (add_bias_fwd) total += wi[extent];
bool DeSerialize(bool swap, FILE *fp)
const int kHistogramBuckets
void Debug2D(const char *msg)
void MatrixDotVector(const double *u, double *v) const
void AddDeltas(const WeightMatrix &other)
double DotProductSSE(const double *u, const double *v, int n)
void Transpose(const GENERIC_2D_ARRAY< double > &input)
void resize_no_init(int size)
void VectorDotMatrix(const double *u, double *v) const
void WriteStrided(int t, const float *data)
void CountAlternators(const WeightMatrix &other, double *same, double *changed) const
int IntCastRounded(double x)
static bool IsAVXAvailable()
int32_t IntDotProductSSE(const int8_t *u, const int8_t *v, int n)
bool DeSerialize(bool swap, FILE *fp)
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
int FWrite(const void *buffer, int size, int count)
void add(inT32 value, inT32 count)
void Update(double learning_rate, double momentum, int num_samples)
void ResizeNoInit(int size1, int size2)
double DotProductAVX(const double *u, const double *v, int n)
static double DotProduct(const double *u, const double *v, int n)
bool DeSerialize(bool training, TFile *fp)
int InitWeightsFloat(int no, int ni, bool ada_grad, float weight_range, TRand *randomizer)
static void FloatToDouble(const GENERIC_2D_ARRAY< float > &wf, GENERIC_2D_ARRAY< double > *wd)
bool Serialize(bool training, TFile *fp) const
bool DeSerializeOld(bool training, TFile *fp)
double DotProduct(const double *u, const double *v, int n)
void MultiplyAccumulate(const double *v, double *inout)
double SignedRand(double range)
static bool IsSSEAvailable()
void SumOuterTransposed(const TransposedArray &u, const TransposedArray &v, bool parallel)
int FRead(void *buffer, int size, int count)