[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Getfem-commits] [getfem-commits] branch master updated: Whitespace fixe
From: |
Konstantinos Poulios |
Subject: |
[Getfem-commits] [getfem-commits] branch master updated: Whitespace fixes |
Date: |
Tue, 17 Dec 2024 03:39:14 -0500 |
This is an automated email from the git hooks/post-receive script.
logari81 pushed a commit to branch master
in repository getfem.
The following commit(s) were added to refs/heads/master by this push:
new 8f3578b2 Whitespace fixes
8f3578b2 is described below
commit 8f3578b2e73278f31415447bb7c02006665aaa5a
Author: Konstantinos Poulios <logari81@gmail.com>
AuthorDate: Tue Dec 17 09:38:59 2024 +0100
Whitespace fixes
---
src/bgeot_sparse_tensors.cc | 682 ++++++++++++++++++++------------------
src/getfem/bgeot_sparse_tensors.h | 412 +++++++++++------------
src/getfem_mat_elem.cc | 21 +-
src/gmm/gmm_blas.h | 7 +-
src/gmm/gmm_lapack_interface.h | 25 +-
5 files changed, 589 insertions(+), 558 deletions(-)
diff --git a/src/bgeot_sparse_tensors.cc b/src/bgeot_sparse_tensors.cc
index cbc89f2c..7d08de4a 100644
--- a/src/bgeot_sparse_tensors.cc
+++ b/src/bgeot_sparse_tensors.cc
@@ -28,7 +28,7 @@ namespace bgeot {
std::ostream& operator<<(std::ostream& o, const tensor_ranges& r) {
for (size_type i=0; i < r.size(); ++i) {
if (i) o << "x";
- o << "[0.." << r[i] << "]";
+ o << "[0.." << r[i] << "]";
}
return o;
}
@@ -58,9 +58,11 @@ namespace bgeot {
std::vector<int> n;
public:
const tensor_ranges& getcnt() const { return cnt; }
- basic_multi_iterator() {
- N = 0; idxnums.reserve(16); strides.reserve(64);
- slst.reserve(16);
+ basic_multi_iterator() {
+ N = 0;
+ idxnums.reserve(16);
+ strides.reserve(64);
+ slst.reserve(16);
ilst2idxnums.reserve(64); iter.reserve(4);
}
const tensor_ranges& all_ranges() const { return ranges; }
@@ -71,15 +73,15 @@ namespace bgeot {
assert(idxs.size() == r.size()); assert(s.size() == r.size()+1);
slst.push_back(&s);
for (unsigned int i=0; i < idxs.size(); ++i) {
- index_set::const_iterator f=std::find(idxnums.begin(), idxnums.end(),
idxs[i]);
- if (f == idxnums.end()) {
- ilst2idxnums.push_back(dim_type(idxnums.size()));
- idxnums.push_back(idxs[i]);
- ranges.push_back(r[i]);
- } else {
- ilst2idxnums.push_back(dim_type(f-idxnums.begin()));
- assert(ranges[ilst2idxnums.back()] == r[i]);
- }
+ index_set::const_iterator f=std::find(idxnums.begin(), idxnums.end(),
idxs[i]);
+ if (f == idxnums.end()) {
+ ilst2idxnums.push_back(dim_type(idxnums.size()));
+ idxnums.push_back(idxs[i]);
+ ranges.push_back(r[i]);
+ } else {
+ ilst2idxnums.push_back(dim_type(f-idxnums.begin()));
+ assert(ranges[ilst2idxnums.back()] == r[i]);
+ }
}
iter.push_back(it_);
N++;
@@ -88,38 +90,37 @@ namespace bgeot {
unsigned c = 0;
strides.assign(N*idxnums.size(),0);
for (unsigned int i=0; i < N; ++i) {
- for (unsigned int j=0; j < slst[i]->size()-1; ++j) {
- stride_type s = (*slst[i])[j];
- strides[ilst2idxnums[c]*N + i] = s;
- c++;
- }
+ for (unsigned int j=0; j < slst[i]->size()-1; ++j) {
+ stride_type s = (*slst[i])[j];
+ strides[ilst2idxnums[c]*N + i] = s;
+ c++;
+ }
}
n.assign(N+1,-1);
for (unsigned int i=0; i < idxnums.size(); ++i) {
- for (unsigned int j=0; j < N; ++j) {
- if (strides[i*N+j]) n[j+1] = i;
- }
+ for (unsigned int j=0; j < N; ++j)
+ if (strides[i*N+j])
+ n[j+1] = i;
}
cnt.assign(idxnums.size(),0);
}
/* unfortunatly these two function don't inline
- and are not optimized by the compiler
+ and are not optimized by the compiler
(when b and N are known at compile-time) (at least for gcc-3.2) */
bool next(unsigned int b) { return next(b,N); }
bool next(unsigned int b, unsigned int NN) {
int i0 = n[b+1];
while (i0 > n[b]) {
- if (++cnt[i0] < ranges[i0]) {
- for (unsigned int i=b; i < NN; ++i) {
- iter[i] += strides[i0*NN+i];
- }
- return true;
- } else {
- for (unsigned int i=b; i < NN; ++i) {
- iter[i] -= strides[i0*NN+i]*(ranges[i0]-1);
- }
- cnt[i0]=0; i0--;
- }
+ if (++cnt[i0] < ranges[i0]) {
+ for (unsigned int i=b; i < NN; ++i)
+ iter[i] += strides[i0*NN+i];
+ return true;
+ } else {
+ for (unsigned int i=b; i < NN; ++i)
+ iter[i] -= strides[i0*NN+i]*(ranges[i0]-1);
+ cnt[i0]=0;
+ i0--;
+ }
}
return false;
}
@@ -128,7 +129,7 @@ namespace bgeot {
};
- /*
+ /*
constructeur par fusion de deux masques binaires
(avec potentiellement une intersection non vide)
*/
@@ -142,35 +143,37 @@ namespace bgeot {
if (tm2.ndim()==0) { assign(tm1); return; }
r = tm1.ranges();
idxs = tm1.indexes();
-
+
/* ce tableau va permettre de faire une boucle commune sur les
deux masques, les dimension inutilisees etant mises a 1 pour
ne pas gener */
tensor_ranges global_r(std::max(tm1.max_dim(),tm2.max_dim())+1,
index_type(1));
-
- for (index_type i = 0; i < tm1.indexes().size(); ++i)
+
+ for (index_type i = 0; i < tm1.indexes().size(); ++i)
global_r[tm1.indexes()[i]] = tm1.ranges()[i];
/* detect new indices */
for (index_type i = 0; i < tm2.indexes().size(); ++i) {
index_set::const_iterator f=std::find(tm1.indexes().begin(),
tm1.indexes().end(), tm2.indexes()[i]);
if (f == tm1.indexes().end()) {
- assert(global_r[tm2.indexes()[i]]==1);
- global_r[tm2.indexes()[i]] = tm2.ranges()[i];
- r.push_back(tm2.ranges()[i]);
- idxs.push_back(tm2.indexes()[i]);
+ assert(global_r[tm2.indexes()[i]]==1);
+ global_r[tm2.indexes()[i]] = tm2.ranges()[i];
+ r.push_back(tm2.ranges()[i]);
+ idxs.push_back(tm2.indexes()[i]);
}
else assert(global_r[tm2.indexes()[i]] = tm2.ranges()[i]);
}
eval_strides();
assert(size());
m.assign(size(),false);
- /* sans doute pas tres optimal, mais la fonction n'est pas critique .. */
+ /* sans doute pas tres optimal, mais la fonction n'est pas critique .. */
for (tensor_ranges_loop l(global_r); !l.finished(); l.next()) {
if (and_op) {
- if (tm1(l.cnt) && tm2(l.cnt)) m.add(pos(l.cnt));
+ if (tm1(l.cnt) && tm2(l.cnt))
+ m.add(pos(l.cnt));
} else {
- if (tm1(l.cnt) || tm2(l.cnt)) m.add(pos(l.cnt));
+ if (tm1(l.cnt) || tm2(l.cnt))
+ m.add(pos(l.cnt));
}
}
unset_card();
@@ -187,49 +190,50 @@ namespace bgeot {
/* same masks ? */
if (tm1.indexes() == tm2.indexes() &&
- tm1.ranges() == tm2.ranges() &&
- tm1.strides() == tm2.strides()) {
+ tm1.ranges() == tm2.ranges() &&
+ tm1.strides() == tm2.strides()) {
r = tm1.ranges(); idxs=tm1.indexes(); s=tm1.strides();
- assert(tm1.m.size() == tm2.m.size());
+ assert(tm1.m.size() == tm2.m.size());
m = tm1.m;
if (and_op) {
- for (index_type i=0; i < tm2.m.size(); ++i)
- if (!tm2.m[i]) m[i] = false;
+ for (index_type i=0; i < tm2.m.size(); ++i)
+ if (!tm2.m[i])
+ m[i] = false;
} else {
- for (index_type i=0; i < tm2.m.size(); ++i)
- if (tm2.m[i]) m[i] = true;
+ for (index_type i=0; i < tm2.m.size(); ++i)
+ if (tm2.m[i])
+ m[i] = true;
}
return;
}
basic_multi_iterator<unsigned> bmit;
- bmit.insert(tm1.indexes(), tm1.ranges(), tm1.strides(), 0);
- bmit.insert(tm2.indexes(), tm2.ranges(), tm2.strides(), 0);
+ bmit.insert(tm1.indexes(), tm1.ranges(), tm1.strides(), 0);
+ bmit.insert(tm2.indexes(), tm2.ranges(), tm2.strides(), 0);
r = bmit.all_ranges(); idxs = bmit.all_indexes(); eval_strides();
assert(size());
- m.assign(size(),false);
+ m.assign(size(),false);
bmit.insert(indexes(), ranges(), strides(), 0);
- bmit.prepare();
+ bmit.prepare();
//cout << "tm1=" << tm1 << "\ntm2=" << tm2 << endl;
if (and_op) {
do {
- if (tm1.m[bmit.it(0)]) {
- do {
- if (tm2.m[bmit.it(1)]) {
- m[bmit.it(2)] = 1;
- }
- // cout << "at cnt=" << bmit.getcnt() << ", it0=" <<
bmit.it(0) << "=" << tm1.m[bmit.it(0)]
- // << ", it1=" << bmit.it(1) << "=" << tm2.m[bmit.it(1)]
<< ", res=" << bmit.it(2) << "=" << m[bmit.it(2)] << endl;
- } while (bmit.qnext<1,3>());
- }
+ if (tm1.m[bmit.it(0)]) {
+ do {
+ if (tm2.m[bmit.it(1)])
+ m[bmit.it(2)] = 1;
+ // cout << "at cnt=" << bmit.getcnt() << ", it0=" << bmit.it(0)
<< "=" << tm1.m[bmit.it(0)]
+ // << ", it1=" << bmit.it(1) << "=" << tm2.m[bmit.it(1)] <<
", res=" << bmit.it(2) << "=" << m[bmit.it(2)] << endl;
+ } while (bmit.qnext<1,3>());
+ }
} while (bmit.qnext<0,3>());
} else {
do {
- bool v1 = tm1.m[bmit.it(0)];
- do {
- if (v1 || (tm2.m[bmit.it(1)]))
- m[bmit.it(2)] = 1;
- } while (bmit.qnext<1,3>());
+ bool v1 = tm1.m[bmit.it(0)];
+ do {
+ if (v1 || tm2.m[bmit.it(1)])
+ m[bmit.it(2)] = 1;
+ } while (bmit.qnext<1,3>());
} while (bmit.qnext<0,3>());
}
// cout << "output: " << *this << endl;
@@ -238,8 +242,8 @@ namespace bgeot {
/* construit un masque forme du produit cartesien de plusieurs masques
(disjoints)
/!\ aucune verif sur la validite des arguments */
- tensor_mask::tensor_mask(const std::vector<const tensor_mask*>& tm) {
- assign(tm);
+ tensor_mask::tensor_mask(const std::vector<const tensor_mask*>& tm) {
+ assign(tm);
}
#if 0
// REMPLACE PAR LA FONCTION SUIVANTE
@@ -249,9 +253,9 @@ namespace bgeot {
r.reserve(255); idxs.reserve(255); mask_start.reserve(255);
for (dim_type i=0; i < tm.size(); ++i) {
mask_start.push_back(r.size());
- for (dim_type j=0; j < tm[i]->ndim(); ++j) {
- r.push_back(tm[i]->ranges()[j]);
- idxs.push_back(tm[i]->indexes()[j]);
+ for (dim_type j=0; j < tm[i]->ndim(); ++j) {
+ r.push_back(tm[i]->ranges()[j]);
+ idxs.push_back(tm[i]->indexes()[j]);
}
}
eval_strides(); assert(size());
@@ -259,10 +263,11 @@ namespace bgeot {
for (tensor_ranges_loop l(r); !l.finished(); l.next()) {
bool is_in = true;
for (dim_type i=0; is_in && i < tm.size(); ++i) {
- index_type s_ = 0;
- for (dim_type j=0; j < tm[i]->ndim(); ++j)
- s_ += l.cnt[j+mask_start[i]]*tm[i]->strides()[j];
- if (!tm[i]->m[s_]) is_in = false;
+ index_type s_ = 0;
+ for (dim_type j=0; j < tm[i]->ndim(); ++j)
+ s_ += l.cnt[j+mask_start[i]]*tm[i]->strides()[j];
+ if (!tm[i]->m[s_])
+ is_in = false;
}
if (is_in) m.add(lpos(l.cnt));
}
@@ -281,16 +286,17 @@ namespace bgeot {
assert(size());
m.assign(size(), false);
bmit.insert(indexes(), ranges(), strides(), 0);
- bmit.prepare();
+ bmit.prepare();
unsigned N = unsigned(tm.size());
bool finished = false;
while (!finished) {
bool is_in = true;
unsigned int b;
for (b=0; b < N; ++b) {
- if (!tm[b]->m[bmit.it(b)]) {
- is_in = false; break;
- }
+ if (!tm[b]->m[bmit.it(b)]) {
+ is_in = false;
+ break;
+ }
}
if (is_in) { m[bmit.it(N)] = 1; b = N-1; }
while (!finished && !bmit.next(b)) { if (b == 0) finished = true; b--; }
@@ -298,7 +304,7 @@ namespace bgeot {
}
void tensor_mask::unpack_strides(const tensor_strides& packed,
tensor_strides& unpacked) const {
- if (packed.size() != card())
+ if (packed.size() != card())
assert(packed.size()==card());
unpacked.assign(size(),INT_MIN);
index_type i=0;
@@ -326,18 +332,18 @@ namespace bgeot {
assign_shape(sub);
/* fusionne sub et tr.shape */
merge(tr);
-
+
/* reserve l'espace pour les strides */
strides_.resize(masks().size());
for (dim_type i = 0; i < strides_.size(); ++i)
strides_[i].resize(mask(i).card());
-
+
pbase_ = tr.pbase_; base_shift_ = tr.base_shift();
-
- /*
- cout << "\n -> entree dans set_sub_tensor: " << endl
- << "tr.shape=" << (tensor_shape&)(tr) << endl
- << " sub=" << sub << endl;
+
+ /*
+ cout << "\n -> entree dans set_sub_tensor: " << endl
+ << "tr.shape=" << (tensor_shape&)(tr) << endl
+ << " sub=" << sub << endl;
*/
/* pre-calcul rapide des strides sur tr, pour chacun de ses masques */
std::vector<tensor_strides> trstrides_unpacked(tr.masks().size());
@@ -345,56 +351,58 @@ namespace bgeot {
tr.mask(im).check_assertions();
tr.mask(im).unpack_strides(tr.strides()[im], trstrides_unpacked[im]);
}
-
-
+
+
/* on parcours chaque masque (merge) */
for (dim_type im = 0; im < masks().size(); ++im) {
const tensor_mask& m = masks()[im];
/* par construction, tous les masques de tr sont inclus (ou egaux)
- dans un masque de l'objet courant
-
- on construit donc la liste des masques de tr qui sont inclus dans m
+ dans un masque de l'objet courant
+
+ on construit donc la liste des masques de tr qui sont inclus dans m
*/
std::vector<dim_type> trmasks; trmasks.reserve(tr.masks().size());
for (dim_type i=0; i < m.indexes().size(); ++i) {
- if (tr.index_is_valid(m.indexes()[i])) { /* l'index n'est pas
forcement valide pour tr !*/
- dim_type im2 = tr.index_to_mask_num(m.indexes()[i]);
- if (std::find(trmasks.begin(), trmasks.end(), im2)==trmasks.end())
trmasks.push_back(im2);
- }
+ if (tr.index_is_valid(m.indexes()[i])) { /* l'index n'est pas
forcement valide pour tr !*/
+ dim_type im2 = tr.index_to_mask_num(m.indexes()[i]);
+ if (std::find(trmasks.begin(), trmasks.end(), im2)==trmasks.end())
+ trmasks.push_back(im2);
+ }
}
-
+
tensor_ranges gcnt(tr.ndim(),0);
stride_type stcnt = 0;
for (tensor_ranges_loop l(m.ranges()); !l.finished(); l.next()) {
- /* recopie les indice de la boucle dans les indices globaux */
- for (dim_type i=0; i < m.ranges().size(); ++i) gcnt[m.indexes()[i]] =
l.index(i);
-
- bool in_m = m(gcnt);
- bool in_trm = true;
- stride_type tr_s = 0;
-
- /* verifie si l'element est bien marque non nul dans les masques de tr
- et met a jour le stride */
- for (dim_type i=0; i < trmasks.size(); ++i) {
- const tensor_mask &mm = tr.mask(trmasks[i]);
-
- //cout << " mm=" << mm << endl << "gcnt=" << gcnt << endl;
- if (mm(gcnt)) {
- tr_s += trstrides_unpacked[trmasks[i]][mm.pos(gcnt)];
- assert(trstrides_unpacked[trmasks[i]][mm.pos(gcnt)]>=0); // ---
DEBUG ---
- } else { in_trm = false; break; }
- }
- /* verifie que m est un sous-ensemble des masques de trmasks */
- if (in_m) assert(in_trm);
- if (!in_trm) assert(!in_m);
- /* recopie le stride si l'element est non nul dans m */
- if (in_m) {
- // cout << "ajout du " << stcnt << "eme elt @ stride=" << tr_s
<< endl;
- strides_[im][stcnt++] = tr_s;
- }
+ /* recopie les indice de la boucle dans les indices globaux */
+ for (dim_type i=0; i < m.ranges().size(); ++i)
+ gcnt[m.indexes()[i]] = l.index(i);
+
+ bool in_m = m(gcnt);
+ bool in_trm = true;
+ stride_type tr_s = 0;
+
+ /* verifie si l'element est bien marque non nul dans les masques de tr
+ et met a jour le stride */
+ for (dim_type i=0; i < trmasks.size(); ++i) {
+ const tensor_mask &mm = tr.mask(trmasks[i]);
+
+ //cout << " mm=" << mm << endl << "gcnt=" << gcnt << endl;
+ if (mm(gcnt)) {
+ tr_s += trstrides_unpacked[trmasks[i]][mm.pos(gcnt)];
+ assert(trstrides_unpacked[trmasks[i]][mm.pos(gcnt)]>=0); // ---
DEBUG ---
+ } else { in_trm = false; break; }
+ }
+ /* verifie que m est un sous-ensemble des masques de trmasks */
+ if (in_m) assert(in_trm);
+ if (!in_trm) assert(!in_m);
+ /* recopie le stride si l'element est non nul dans m */
+ if (in_m) {
+ // cout << "ajout du " << stcnt << "eme elt @ stride=" << tr_s <<
endl;
+ strides_[im][stcnt++] = tr_s;
+ }
}
-
+
/* verif que yapa bug */
assert(stcnt == stride_type(m.card()));
}
@@ -409,23 +417,24 @@ namespace bgeot {
/* shift the base according to the old stride */
ensure_0_stride();
-
+
/* create a mask m2 with one less dimension than m1 */
const tensor_mask& m1(index_to_mask(slice.dim));
dim_type mdim = index_to_mask_dim(slice.dim);
- if (m1.ndim() > 1) {
+ if (m1.ndim() > 1) {
tensor_ranges r(m1.ranges()); r.erase(r.begin()+mdim);
index_set idx(m1.indexes()); idx.erase(idx.begin()+mdim);
tensor_mask m2(r,idx);
index_type pos1 = 0, pos2 = 0;
for (tensor_ranges_loop l(m1.ranges()); !l.finished(); l.next()) {
- if (l.index(mdim) == slice.i0) {
- m2.set_mask_val(pos2++, m1(pos1));
- } else assert(m1(pos1) == 0);
- pos1++;
+ if (l.index(mdim) == slice.i0)
+ m2.set_mask_val(pos2++, m1(pos1));
+ else
+ assert(m1(pos1) == 0);
+ pos1++;
}
-
-
+
+
/* replace the old mask by the new one */
assert(index_to_mask_num(slice.dim) < masks().size());
masks()[index_to_mask_num(slice.dim)] = m2;
@@ -439,7 +448,7 @@ namespace bgeot {
update_idx2mask();
}
-
+
struct compare_packed_range {
const std::vector<packed_range_info>& pri;
std::vector<stride_type> mean_inc;
@@ -448,9 +457,10 @@ namespace bgeot {
if (pri[a].n < pri[b].n) return true;
else if (pri[a].n > pri[b].n) return false;
else { /* c'est la que ca devient interessant */
- if (pri[a].mean_increm > pri[b].mean_increm) return true;
+ if (pri[a].mean_increm > pri[b].mean_increm)
+ return true;
}
- return false;
+ return false;
}
};
@@ -459,13 +469,13 @@ namespace bgeot {
index_type N_packed_idx = 0;
/* non null elements across all tensors */
-
+
tensor_shape ts(trtab[0]);
for (dim_type i=1; i < trtab.size(); ++i)
ts.merge(trtab[i]);
-
-
- /* apply the mask to all tensors,
+
+
+ /* apply the mask to all tensors,
updating strides according to it */
for (dim_type i = 0; i < N; ++i) {
tensor_ref tmp = trtab[i];
@@ -476,16 +486,16 @@ namespace bgeot {
dal::bit_vector packed_idx; packed_idx.sup(0,ts.ndim());
for (index_type mi=0; mi < ts.masks().size(); ++mi) {
if (ts.masks()[mi].card() != 1) {
- packed_idx.add(mi);
- N_packed_idx++;
+ packed_idx.add(mi);
+ N_packed_idx++;
} else {
- /* sanity check (TODO: s'assurer que sub_tensor_ref appelle bien
ensure_0_stride) */
- for (dim_type j=0; j < N; ++j) {
- if (trtab[j].strides()[mi].size() != 0) {
- assert(trtab[j].strides()[mi].size() == 1);
- assert(trtab[j].strides()[mi][0] == 0);
- }
- }
+ /* sanity check (TODO: s'assurer que sub_tensor_ref appelle bien
ensure_0_stride) */
+ for (dim_type j=0; j < N; ++j) {
+ if (trtab[j].strides()[mi].size() != 0) {
+ assert(trtab[j].strides()[mi].size() == 1);
+ assert(trtab[j].strides()[mi][0] == 0);
+ }
+ }
}
}
@@ -497,13 +507,15 @@ namespace bgeot {
index_type pmi = 0;
for (dim_type mi = 0; mi < ts.masks().size(); ++mi) {
if (packed_idx[mi]) {
- dim_type n;
- pri[pmi].original_masknum = mi;
- pri[pmi].range = ts.masks()[mi].card();
- for (n = 0; n < N; n++)
- if (trtab[n].index_is_valid(mi)) break;
- pri[pmi].n = n; pr[pmi].n = n;
- pmi++;
+ dim_type n;
+ pri[pmi].original_masknum = mi;
+ pri[pmi].range = ts.masks()[mi].card();
+ for (n = 0; n < N; n++)
+ if (trtab[n].index_is_valid(mi))
+ break;
+ pri[pmi].n = n;
+ pr[pmi].n = n;
+ pmi++;
}
}
@@ -511,7 +523,7 @@ namespace bgeot {
std::sort(pri.begin(), pri.end());
- /* eval bloc ranks */
+ /* eval bloc ranks */
bloc_rank.resize(N+1); bloc_rank[0] = 0;
bloc_nelt.resize(N+1); bloc_nelt[0] = 0;
for (index_type i=1; i <= N; ++i) {
@@ -529,19 +541,20 @@ namespace bgeot {
pri[pmi].have_regular_strides = std::bitset<32>((1 << N)-1);
for (dim_type n=pri[pmi].n; n < N; ++n) {
index_type pos0 = (n - pri[pmi].n);
- for (index_type i = 0; i < pri[pmi].range; ++i) {
- index_type pos = i * (N-pri[pmi].n) + pos0;
- if (i != pri[pmi].range-1) {
- stride_type increm = trtab[n].strides()[mi][i+1] -
trtab[n].strides()[mi][i];
- pri[pmi].inc[pos] = increm;
- if (pri[pmi].inc[pos] != pri[pmi].inc[pos0])
+ for (index_type i = 0; i < pri[pmi].range; ++i) {
+ index_type pos = i * (N-pri[pmi].n) + pos0;
+ if (i != pri[pmi].range-1) {
+ stride_type increm = trtab[n].strides()[mi][i+1] -
trtab[n].strides()[mi][i];
+ pri[pmi].inc[pos] = increm;
+ if (pri[pmi].inc[pos] != pri[pmi].inc[pos0])
pri[pmi].have_regular_strides[n] = false;
- pri[pmi].mean_increm += increm;
- } else { pri[pmi].inc[pos] = -trtab[n].strides()[mi][i]; }
- }
+ pri[pmi].mean_increm += increm;
+ } else
+ pri[pmi].inc[pos] = -trtab[n].strides()[mi][i];
+ }
}
if (pri[pmi].n!=N)
- pri[pmi].mean_increm /= ((N-pri[pmi].n)*(pri[pmi].range-1));
+ pri[pmi].mean_increm /= (N-pri[pmi].n)*(pri[pmi].range-1);
}
/* optimize them w/r to mean strides (without modifying the "ranks") */
@@ -554,8 +567,8 @@ namespace bgeot {
std::vector<packed_range> tmppr(pr);
std::vector<packed_range_info> tmppri(pri);
for (dim_type i =0; i < pri.size(); ++i) {
- pr[i] = tmppr [pr_reorder[i]];
- pri[i] = tmppri[pr_reorder[i]];
+ pr[i] = tmppr [pr_reorder[i]];
+ pri[i] = tmppri[pr_reorder[i]];
}
}
@@ -564,31 +577,35 @@ namespace bgeot {
idxval.resize(ts.ndim());
for (dim_type i=0; i < ts.ndim(); ++i) {
- idxval[i].ppinc = NULL; idxval[i].pposbase = NULL; idxval[i].pos_ = 0;
+ idxval[i].ppinc = NULL;
+ idxval[i].pposbase = NULL;
+ idxval[i].pos_ = 0;
}
for (index_type mi = 0; mi < ts.masks().size(); ++mi) {
- tensor_strides v;
- pmi = index_type(-1);
- for (dim_type i=0; i < pr.size(); ++i)
- if (pri[i].original_masknum == mi) pmi = i;
-
- if (pmi != index_type(-1)) {
- ts.masks()[mi].gen_mask_pos(pri[pmi].mask_pos);
- } else { /* not very nice .. */
- ts.masks()[mi].gen_mask_pos(v); assert(v.size()==1);
- }
- for (dim_type i=0; i < ts.masks()[mi].indexes().size(); ++i) {
- dim_type ii = ts.masks()[mi].indexes()[i];
- idxval[ii].cnt_num = dim_type(pmi); //packed_idx[mi] ? pmi :
dim_type(-1);
- idxval[ii].pos_ = (pmi != index_type(-1)) ? 0 : v[0];
- idxval[ii].mod = ts.masks()[mi].strides()[i+1];
- idxval[ii].div = ts.masks()[mi].strides()[i];
- }
+ tensor_strides v;
+ pmi = index_type(-1);
+ for (dim_type i=0; i < pr.size(); ++i)
+ if (pri[i].original_masknum == mi)
+ pmi = i;
+
+ if (pmi != index_type(-1)) {
+ ts.masks()[mi].gen_mask_pos(pri[pmi].mask_pos);
+ } else { /* not very nice .. */
+ ts.masks()[mi].gen_mask_pos(v);
+ assert(v.size()==1);
+ }
+ for (dim_type i=0; i < ts.masks()[mi].indexes().size(); ++i) {
+ dim_type ii = ts.masks()[mi].indexes()[i];
+ idxval[ii].cnt_num = dim_type(pmi); //packed_idx[mi] ? pmi :
dim_type(-1);
+ idxval[ii].pos_ = (pmi != index_type(-1)) ? 0 : v[0];
+ idxval[ii].mod = ts.masks()[mi].strides()[i+1];
+ idxval[ii].div = ts.masks()[mi].strides()[i];
+ }
}
}
- /* check for opportunities to vectorize the loops with the mti
+ /* check for opportunities to vectorize the loops with the mti
(assuming regular strides etc.)
*/
vectorized_strides_.resize(N); vectorized_size_ = 0;
@@ -599,8 +616,8 @@ namespace bgeot {
if (vectorized_pr_dim == pri.size()-1) {
if (pp->have_regular_strides.count() == N) vectorized_size_ =
pp->range;
for (dim_type n=pp->n; n < N; ++n) {
- GMM_ASSERT3(n < pp->inc.size(), ""); // expected failure here with
"empty" tensors, see tests/test_assembly.cc, testbug() function.
- vectorized_strides_[n] = pp->inc[n];
+ GMM_ASSERT3(n < pp->inc.size(), ""); // expected failure here with
"empty" tensors, see tests/test_assembly.cc, testbug() function.
+ vectorized_strides_[n] = pp->inc[n];
}
} else {
if (pp->have_regular_strides.count() != N) break;
@@ -623,18 +640,18 @@ namespace bgeot {
}
void multi_tensor_iterator::print() const {
- cout << "MTI(N=" << N << "): ";
- for (dim_type i=0; i < pr.size(); ++i)
- cout << " pri[" << int(i) << "]: n=" << int(pri[i].n)
- << ", range=" << pri[i].range << ", mean_increm="
- << pri[i].mean_increm << ", regular = " <<
pri[i].have_regular_strides
+ cout << "MTI(N=" << N << "): ";
+ for (dim_type i=0; i < pr.size(); ++i)
+ cout << " pri[" << int(i) << "]: n=" << int(pri[i].n)
+ << ", range=" << pri[i].range << ", mean_increm="
+ << pri[i].mean_increm << ", regular = " <<
pri[i].have_regular_strides
<< ", inc=" << vref(pri[i].inc) << "\n";
- cout << "bloc_rank: " << vref(bloc_rank) << ", bloc_nelt: " <<
vref(bloc_nelt) << "\n";
+ cout << "bloc_rank: " << vref(bloc_rank) << ", bloc_nelt: " <<
vref(bloc_nelt) << "\n";
cout << "vectorized_size : " << vectorized_size_ << ", strides = " <<
vref(vectorized_strides_) << ", pr_dim=" << vectorized_pr_dim << "\n";
}
void tensor_reduction::insert(const tensor_ref& tr_, const std::string& s) {
- tensor_shape ts(tr_);
+ tensor_shape ts(tr_);
diag_shape(ts,s);
trtab.push_back(tref_or_reduction(tensor_ref(tr_, ts), s));
//cout << "reduction.insert('" << s << "')\n";
@@ -642,18 +659,18 @@ namespace bgeot {
//cout << "Content: " << tr_ << endl;
//cout << "shape: " << (tensor_shape&)tr_ << endl;
}
-
+
void tensor_reduction::insert(const tref_or_reduction& t, const std::string&
s) {
if (!t.is_reduction()) {
insert(t.tr(),s);
} else {
trtab.push_back(t); trtab.back().ridx = s;
//cout << "reduction.insert_reduction('" << s << "')\n";
- //cout << "Reduction: INSERT REDUCTION tr(ndim=" << int(t.tr().ndim())
<< ", s='" << s << "')\n";
+ //cout << "Reduction: INSERT REDUCTION tr(ndim=" << int(t.tr().ndim())
<< ", s='" << s << "')\n";
}
}
- /* ensure that r(i,i).t(j,:,j,j,k,o)
+ /* ensure that r(i,i).t(j,:,j,j,k,o)
becomes r(i,A).t(j,:,B,C,k,o)
and updates reduction_chars accordingly
*/
@@ -662,19 +679,19 @@ namespace bgeot {
for (trtab_iterator it = trtab.begin(); it != trtab.end(); ++it) {
assert(it->ridx.size() == it->tr().ndim());
for (unsigned i =0; i < it->ridx.size(); ++i) {
- if (it->ridx[i] != ' ' &&
- reduction_chars.find(it->ridx[i]) == std::string::npos)
- reduction_chars.push_back(it->ridx[i]);
+ if (it->ridx[i] != ' ' &&
+ reduction_chars.find(it->ridx[i]) == std::string::npos)
+ reduction_chars.push_back(it->ridx[i]);
}
}
/* for each tensor, if a diagonal reduction inside the tensor is used,
the mask of the tensor has been 'and'-ed with a diagonal mask
- and a second 'virtual' reduction index is used */
+ and a second 'virtual' reduction index is used */
for (trtab_iterator it = trtab.begin(); it != trtab.end(); ++it) {
it->gdim.resize(it->ridx.size());
for (unsigned i =0; i < it->ridx.size(); ++i) {
char c = it->ridx[i];
- if (c != ' ' && it->ridx.find(c) != i) {
+ if (c != ' ' && it->ridx.find(c) != i) {
for (c = 'A'; c <= 'Z'; ++c)
if (reduction_chars.find(c) == std::string::npos) break;
it->ridx[i] = c;
@@ -684,8 +701,8 @@ namespace bgeot {
}
}
- /*
- initialize 'reduced_range' and it->rdim
+ /*
+ initialize 'reduced_range' and it->rdim
*/
void tensor_reduction::pre_prepare() {
for (trtab_iterator it = trtab.begin(); it != trtab.end(); ++it) {
@@ -693,10 +710,11 @@ namespace bgeot {
it->rdim.resize(it->ridx.size());
//cout << " rix = '" << it->ridx << "'\n";
for (dim_type i =0; i < it->ridx.size(); ++i) {
- if (it->ridx[i] == ' ') {
- reduced_range.push_back(it->tr().dim(i));
- it->rdim[i] = dim_type(reduced_range.size()-1);
- } else it->rdim[i] = dim_type(-1);
+ if (it->ridx[i] == ' ') {
+ reduced_range.push_back(it->tr().dim(i));
+ it->rdim[i] = dim_type(reduced_range.size()-1);
+ } else
+ it->rdim[i] = dim_type(-1);
}
}
}
@@ -712,14 +730,14 @@ namespace bgeot {
//cout << "find_best_reduction: reduction_chars='" << reduction_chars <<
"'\n";
GMM_ASSERT2(trtab.size() <= 32, "wow it was assumed that nobody would "
- "ever need a reduction on more than 32 tensors..");
+ "ever need a reduction on more than 32
tensors..");
std::vector<std::bitset<32> > idx_occurences(reduction_chars.size());
for (unsigned ir=0; ir < reduction_chars.size(); ++ir) {
char c = reduction_chars[ir];
for (unsigned tnum=0; tnum < trtab.size(); ++tnum)
- idx_occurences[ir][tnum] = (trtab[tnum].ridx.find(c) !=
std::string::npos);
+ idx_occurences[ir][tnum] = (trtab[tnum].ridx.find(c) !=
std::string::npos);
//cout << "find_best_reduction: idx_occurences[" << ir << "] = " <<
idx_occurences[ir] << "\n";
}
size_type best_redsz = 100000000;
@@ -728,41 +746,43 @@ namespace bgeot {
idxset.resize(0); idxset.push_back(reduction_chars[ir]);
/* add other possible reductions */
for (unsigned ir2=0; ir2 < reduction_chars.size(); ++ir2) {
- if (ir2 != ir) {
- if ((idx_occurences[ir2] | idx_occurences[ir]) == idx_occurences[ir])
{
- lst.add(ir2);
- idxset.push_back(reduction_chars[ir2]);
- }
- }
+ if (ir2 != ir) {
+ if ((idx_occurences[ir2] | idx_occurences[ir]) ==
idx_occurences[ir]) {
+ lst.add(ir2);
+ idxset.push_back(reduction_chars[ir2]);
+ }
+ }
}
/* evaluate the cost */
size_type redsz = 1;
for (unsigned tnum=0; tnum < trtab.size(); ++tnum) {
- if (!idx_occurences[ir][tnum]) continue;
+ if (!idx_occurences[ir][tnum])
+ continue;
std::bitset<int(32)> once((int)reduction_chars.size());
- for (dim_type i=0; i < trtab[tnum].tr().ndim(); ++i) {
- bool ignore = false;
- for (dal::bv_visitor j(lst); !j.finished(); ++j) {
- if (trtab[tnum].ridx[i] == reduction_chars[(size_t)j]) {
- if (once[j]) ignore = true; else once[j] = true;
- }
+ for (dim_type i=0; i < trtab[tnum].tr().ndim(); ++i) {
+ bool ignore = false;
+ for (dal::bv_visitor j(lst); !j.finished(); ++j) {
+ if (trtab[tnum].ridx[i] == reduction_chars[(size_t)j]) {
+ if (once[j]) ignore = true;
+ else once[j] = true;
+ }
}
- if (!ignore)
- redsz *= trtab[tnum].tr().dim(i);
- }
+ if (!ignore)
+ redsz *= trtab[tnum].tr().dim(i);
+ }
}
//cout << " test " << reduction_chars[ir] << ": lst=" << lst << ",
redsz=" << redsz << "\n";
if (redsz < best_redsz) {
- best_redsz = redsz;
+ best_redsz = redsz;
best_lst.clear();
for (unsigned i=0; i < trtab.size(); ++i)
if (idx_occurences[ir][i]) best_lst.add(i);
- best_idxset = idxset;
+ best_idxset = idxset;
}
}
- /*cout << "find_best_reduction: lst = " << best_lst << " [nt="
- << trtab.size() << "], idx_set='" << best_idxset
- << "', redsz=" << best_redsz << "\n";
+ /*cout << "find_best_reduction: lst = " << best_lst << " [nt="
+ << trtab.size() << "], idx_set='" << best_idxset
+ << "', redsz=" << best_redsz << "\n";
*/
return best_redsz;
}
@@ -774,48 +794,48 @@ namespace bgeot {
find_best_sub_reduction(bv,red);
if (bv.card() < trtab.size() && red.size()) {
//cout << "making sub reduction\n";
- auto sub = std::make_shared<tensor_reduction>();
+ auto sub = std::make_shared<tensor_reduction>();
std::vector<dim_type> new_rdim; new_rdim.reserve(16);
- std::string new_reduction;
+ std::string new_reduction;
for (dal::bv_visitor tnum(bv); !tnum.finished(); ++tnum) {
- tref_or_reduction &t = trtab[tnum];
+ tref_or_reduction &t = trtab[tnum];
std::string re = t.ridx; t.ridx.clear();
for (unsigned i = 0; i < re.size(); ++i) {
bool reduced = false;
- char c = re[i];
+ char c = re[i];
if (c != ' ') {
if (red.find(re[i]) == std::string::npos) c = ' ';
- else reduced = true;
+ else reduced = true;
}
- if (!reduced) {
+ if (!reduced) {
t.ridx.push_back(re[i]);
- new_rdim.push_back(t.rdim[i]);
- new_reduction.push_back(re[i]);
- }
- re[i] = c;
+ new_rdim.push_back(t.rdim[i]);
+ new_reduction.push_back(re[i]);
+ }
+ re[i] = c;
}
//cout << " sub-";
sub->insert(trtab[tnum], re);
}
- //cout << " new_reduction = '" << new_reduction << "'\n";
+ //cout << " new_reduction = '" << new_reduction << "'\n";
sub->prepare();
- /*cout << " " << new_reduction.size() << " == " <<
int(sub->trres.ndim()) << "?\n";
- assert(new_reduction.size() == sub->trres.ndim());*/
- trtab[bv.first_true()] = tref_or_reduction(sub, new_reduction);
+ /*cout << " " << new_reduction.size() << " == " <<
int(sub->trres.ndim()) << "?\n";
+ assert(new_reduction.size() == sub->trres.ndim());*/
+ trtab[bv.first_true()] = tref_or_reduction(sub, new_reduction);
trtab[bv.first_true()].rdim = new_rdim;
std::vector<tref_or_reduction> trtab2; trtab2.reserve(trtab.size() -
bv.card() + 1);
for (size_type i=0; i < trtab.size(); ++i)
if (!bv.is_in(i) || i == bv.first_true())
trtab2.push_back(trtab[i]);
trtab.swap(trtab2);
- //cout << "make_sub_reductions[" << iter << "] : still " <<
trtab.size() << " tensors\n";
- /*for (size_type i=0; i < trtab.size(); ++i)
- cout << " dim = " << trtab[i].tr().ndim() << " : '" <<
trtab[i].ridx << "'\n";
+ //cout << "make_sub_reductions[" << iter << "] : still " <<
trtab.size() << " tensors\n";
+ /*for (size_type i=0; i < trtab.size(); ++i)
+ cout << " dim = " << trtab[i].tr().ndim() << " : '" <<
trtab[i].ridx << "'\n";
*/
- ++iter;
+ ++iter;
} else {
- //cout << "Ok, no more reductions (bv.card() == " << bv.card() <<
")\n\n";
- break;
+ //cout << "Ok, no more reductions (bv.card() == " << bv.card() <<
")\n\n";
+ break;
}
} while (1);
}
@@ -833,36 +853,36 @@ namespace bgeot {
} else {
GMM_ASSERT3(tr_out->ndim() == reduced_range.size(), "");
for (dim_type i=0; i < tr_out->ndim(); ++i)
- GMM_ASSERT3(tr_out->dim(i) == reduced_range[i], "");
+ GMM_ASSERT3(tr_out->dim(i) == reduced_range[i], "");
trres = *tr_out;
}
- /* prepare the mapping from each dimension of each tensor to the global
range
+ /* prepare the mapping from each dimension of each tensor to the global
range
(the first dimensions are reserved for non-reduced dimensions, i.e.
those
of 'reduced_range'
*/
- tensor_ranges global_range; /* global range across all tensors of the
- reduction */
+ tensor_ranges global_range; /* global range across all tensors of the
+ reduction */
std::string global_chars; /* name of indexes (or ' ') for each dimension
- of global_range */
- global_range.reserve(16);
+ of global_range */
+ global_range.reserve(16);
global_range.assign(reduced_range.begin(), reduced_range.end());
- global_chars.insert(size_type(0), reduced_range.size(), ' ');
+ global_chars.insert(size_type(0), reduced_range.size(), ' ');
for (trtab_iterator it = trtab.begin(); it != trtab.end(); ++it) {
assert(it->rdim.size() == it->tr().ndim());
it->gdim = it->rdim;
for (dim_type i=0; i < it->ridx.size(); ++i) {
if (it->rdim[i] == dim_type(-1)) {
- assert(it->ridx[i] != ' ');
+ assert(it->ridx[i] != ' ');
std::string::size_type p = global_chars.find(it->ridx[i]);
if (p == std::string::npos) {
global_chars.push_back(it->ridx[i]);
global_range.push_back(it->tr().dim(i));
it->gdim[i] = dim_type(global_range.size() - 1);
} else {
- GMM_ASSERT1(it->tr().dim(i) == global_range[p],
- "inconsistent dimensions for reduction index "
- << it->ridx[i] << "(" << int(it->tr().dim(i))
+ GMM_ASSERT1(it->tr().dim(i) == global_range[p],
+ "inconsistent dimensions for reduction index "
+ << it->ridx[i] << "(" << int(it->tr().dim(i))
<< " != " << int(global_range[p]) << ")");
it->gdim[i] = dim_type(p);
}
@@ -871,7 +891,7 @@ namespace bgeot {
//cout << " rdim = " << it->rdim << ", gdim = " << it->gdim << "\n";
}
//cout << "global_range = " << global_range << ", global_chars = '" <<
global_chars << "'\n";
-
+
std::vector<dim_type> reorder(global_chars.size(), dim_type(-1));
/* increase the dimension of the tensor holding the result */
for (dim_type i=0; i < reduced_range.size(); ++i) reorder[i] = i;
@@ -891,16 +911,16 @@ namespace bgeot {
tt.push_back(it->tr());
//cout << "MTI[" << it-trtab.begin() << "/" << trtab.size() << "] : " <<
(tensor_shape&)it->tr();
}
-
+
/* now, the iterator can be built */
mti.init(tt,false);
}
-
+
static void do_reduction1(bgeot::multi_tensor_iterator &mti) {
do {
scalar_type s1 = 0;
- do {
- s1 += mti.p(1);
+ do {
+ s1 += mti.p(1);
} while (mti.bnext(1));
mti.p(0) += s1;
} while (mti.bnext(0));
@@ -912,15 +932,16 @@ namespace bgeot {
if (n && s[0] && s[1] && s[2] == 0) {
BLAS_INT incx = s[1], incy = s[0];
/*mti.print();
- scalar_type *b[3];
+ scalar_type *b[3];
for (int i=0; i < 3; ++i) b[i] = &mti.p(i);*/
do {
- /*cout << "vectorized_ reduction2a : n=" << n << ", s = " << s << "
mti.p=" << &mti.p(0)-b[0] << ","
+ /*cout << "vectorized_ reduction2a : n=" << n << ", s = " << s << "
mti.p=" << &mti.p(0)-b[0] << ","
<< &mti.p(1)-b[1] << "," << &mti.p(2)-b[2] << "\n";*/
- gmm::daxpy_(&n, &mti.p(2), &mti.p(1), &incx, &mti.p(0), &incy);
+ gmm::daxpy_(&n, &mti.p(2), &mti.p(1), &incx, &mti.p(0), &incy);
} while (mti.vnext());
return true;
- } else return false;
+ } else
+ return false;
}
static void do_reduction2a(bgeot::multi_tensor_iterator &mti) {
if (!do_reduction2v(mti)) {
@@ -933,15 +954,15 @@ namespace bgeot {
static void do_reduction2b(bgeot::multi_tensor_iterator &mti) {
do {
scalar_type s1 = 0;
- do {
+ do {
scalar_type s2 = 0;
do {
s2 += mti.p(2);
} while (mti.bnext(2));
- s1 += mti.p(1)*s2;
+ s1 += mti.p(1)*s2;
} while (mti.bnext(1));
mti.p(0) += s1;
- } while (mti.bnext(0));
+ } while (mti.bnext(0));
}
static bool do_reduction3v(bgeot::multi_tensor_iterator &mti) {
@@ -951,10 +972,11 @@ namespace bgeot {
BLAS_INT incx = s[1], incy = s[0];
do {
double v = mti.p(2)*mti.p(3);
- gmm::daxpy_(&n, &v, &mti.p(1), &incx, &mti.p(0), &incy);
+ gmm::daxpy_(&n, &v, &mti.p(1), &incx, &mti.p(0), &incy);
} while (mti.vnext());
return true;
- } else return false;
+ } else
+ return false;
}
static void do_reduction3a(bgeot::multi_tensor_iterator &mti) {
@@ -968,33 +990,33 @@ namespace bgeot {
static void do_reduction3b(bgeot::multi_tensor_iterator &mti) {
do {
scalar_type s1 = 0;
- do {
+ do {
scalar_type s2 = 0;
do {
scalar_type s3 = 0;
- do {
+ do {
s3 += mti.p(3);
} while (mti.bnext(3));
s2 += mti.p(2)*s3;
} while (mti.bnext(2));
- s1 += mti.p(1)*s2;
+ s1 += mti.p(1)*s2;
} while (mti.bnext(1));
mti.p(0) += s1;
- } while (mti.bnext(0));
+ } while (mti.bnext(0));
}
void tensor_reduction::do_reduction() {
/* on s'assure que le tenseur destination a bien ete remis a zero
avant le calcul (c'est obligatoire malheureusement, consequence
- de l'utilisation de masque qui ne s'arretent pas forcement sur les
+ de l'utilisation de masque qui ne s'arretent pas forcement sur les
'frontieres' entre les differents tenseurs reduits) */
//std::fill(out_data.begin(), out_data.end(), 0.);
if (out_data.size()) memset(&out_data[0], 0,
out_data.size()*sizeof(out_data[0]));
for (unsigned i=0; i < trtab.size(); ++i) {
- if (trtab[i].is_reduction()) {
- trtab[i].reduction->do_reduction();
- trtab[i].reduction->result(trtab[i].tr());
- //cout << "resultat intermediaire: " << trtab[i].tr() << endl;
+ if (trtab[i].is_reduction()) {
+ trtab[i].reduction->do_reduction();
+ trtab[i].reduction->result(trtab[i].tr());
+ //cout << "resultat intermediaire: " << trtab[i].tr() << endl;
}
}
mti.rewind();
@@ -1015,32 +1037,32 @@ namespace bgeot {
}
} else if (N == 4) {
do {
- scalar_type s1 = 0;
- do {
- scalar_type s2 = 0;
- do {
- scalar_type s3 = 0;
- do {
+ scalar_type s1 = 0;
+ do {
+ scalar_type s2 = 0;
+ do {
+ scalar_type s3 = 0;
+ do {
scalar_type s4 = 0;
do {
s4 += mti.p(4);
} while (mti.bnext(4));
s3 += mti.p(3)*s4;
- } while (mti.bnext(3));
- s2 += mti.p(2)*s3;
- } while (mti.bnext(2));
- s1 += mti.p(1)*s2;
- } while (mti.bnext(1));
- mti.p(0) += s1;
- } while (mti.bnext(0));
+ } while (mti.bnext(3));
+ s2 += mti.p(2)*s3;
+ } while (mti.bnext(2));
+ s1 += mti.p(1)*s2;
+ } while (mti.bnext(1));
+ mti.p(0) += s1;
+ } while (mti.bnext(0));
} else {
GMM_ASSERT1(false, "unhandled reduction case ! (N=" << int(N) << ")");
}
}
void tensor_reduction::clear() {
- trtab.clear();
- trres.clear();
+ trtab.clear();
+ trres.clear();
reduced_range.resize(0);
reduction_chars.clear();
@@ -1054,27 +1076,29 @@ namespace bgeot {
index_type c=card(true);
check_assertions();
o << " mask : card=" << c << "(card_=" << card_ << ", uptodate=" <<
card_uptodate << "), indexes=";
- for (dim_type i=0; i < idxs.size(); ++i)
+ for (dim_type i=0; i < idxs.size(); ++i)
o << (i==0?"":", ") << int(idxs[i]) << ":" << int(r[i]);
o << " ";
if (c == size()) o << " FULL" << endl;
else {
o << "m={";
if (idxs.size() == 1) {
- for (index_type i=0; i < m.size(); ++i) o << (m[i] ? 1 : 0);
+ for (index_type i=0; i < m.size(); ++i)
+ o << (m[i] ? 1 : 0);
} else {
- for (tensor_ranges_loop l(r); !l.finished(); l.next()) {
- if (l.cnt[0] == 0 && l.cnt[1] == 0 && r.size()>2) {
- o << "\n -> (:,:";
- for (dim_type i=2; i < r.size(); ++i) o << "," << l.cnt[i];
- o << ")={";
- }
- o << (m[lpos(l.cnt)] ? 1 : 0);
- if (l.cnt[0] == r[0]-1) {
- if (l.cnt[1] != r[1]-1) o << ",";
- else if (idxs.size() > 2) o << "}";
- }
- }
+ for (tensor_ranges_loop l(r); !l.finished(); l.next()) {
+ if (l.cnt[0] == 0 && l.cnt[1] == 0 && r.size()>2) {
+ o << "\n -> (:,:";
+ for (dim_type i=2; i < r.size(); ++i)
+ o << "," << l.cnt[i];
+ o << ")={";
+ }
+ o << (m[lpos(l.cnt)] ? 1 : 0);
+ if (l.cnt[0] == r[0]-1) {
+ if (l.cnt[1] != r[1]-1) o << ",";
+ else if (idxs.size() > 2) o << "}";
+ }
+ }
}
o << "}" << endl;
}
@@ -1087,7 +1111,7 @@ namespace bgeot {
for (dim_type i=0; i < idx2mask.size(); ++i) {
if (i) o << ",";
if (idx2mask[i].is_valid()) {
- o << "r" << dim(i) << ":m" << int(idx2mask[i].mask_num) << "/" <<
int(idx2mask[i].mask_dim);
+ o << "r" << dim(i) << ":m" << int(idx2mask[i].mask_num) << "/" <<
int(idx2mask[i].mask_dim);
} else o << " (na) ";
}
o << endl;
@@ -1106,14 +1130,14 @@ namespace bgeot {
multi_tensor_iterator mti(*this, true);
do {
for (dim_type i = 0; i < mti.ndim(); ++i) {
- o << (i==0?"(":",");
- if (index_is_valid(i))
- o << mti.index(i);
- else o << "*";
+ o << (i==0?"(":",");
+ if (index_is_valid(i))
+ o << mti.index(i);
+ else o << "*";
}
o << ")";
if (base()) {
- o << " = " << mti.p(0) << "\t@base+" << &mti.p(0) - base();
+ o << " = " << mti.p(0) << "\t@base+" << &mti.p(0) - base();
} else o << "\t@" << size_t(&mti.p(0))/sizeof(scalar_type);
o << endl;
} while (mti.qnext1());
@@ -1121,11 +1145,11 @@ namespace bgeot {
o << "^---- end tensor_ref" << endl;
}
- std::ostream& operator<<(std::ostream& o, const tensor_mask& m) {
- m.print(o); return o;
+ std::ostream& operator<<(std::ostream& o, const tensor_mask& m) {
+ m.print(o); return o;
}
- std::ostream& operator<<(std::ostream& o, const tensor_shape& ts) {
- ts.print(o); return o;
+ std::ostream& operator<<(std::ostream& o, const tensor_shape& ts) {
+ ts.print(o); return o;
}
std::ostream& operator<<(std::ostream& o, const tensor_ref& tr) {
tr.print(o); return o;
diff --git a/src/getfem/bgeot_sparse_tensors.h
b/src/getfem/bgeot_sparse_tensors.h
index 768f9b92..6b2bbedc 100644
--- a/src/getfem/bgeot_sparse_tensors.h
+++ b/src/getfem/bgeot_sparse_tensors.h
@@ -35,10 +35,10 @@
@brief Sparse tensors, used during the assembly.
"sparse" tensors: these are not handled like sparse matrices
-
+
As an example, let say that we have a tensor t(i,j,k,l) of
- dimensions 4x2x3x3, with t(i,j,k,l!=k) == 0.
-
+ dimensions 4x2x3x3, with t(i,j,k,l!=k) == 0.
+
Then the tensor shape will be represented by a set of 3 objects of type
'tensor_mask':
mask1: {i}, "1111"
@@ -47,13 +47,13 @@
"010"
"001"
They contain a binary tensor indicating the non-null elements.
-
+
The set of these three masks define the shape of the tensor
(class tensor_shape)
If we add information about the location of the non-null elements
(by mean of strides), then we have an object of type 'tensor_ref'
-
+
Iteration on the data of one or more tensor should be done via the
'multi_tensor_iterator', which can iterate over common non-null
elements of a set of tensors.
@@ -97,23 +97,23 @@ namespace bgeot {
typedef scalar_type * TDIter;
- std::ostream& operator<<(std::ostream& o, const tensor_ranges& r);
-
+ std::ostream& operator<<(std::ostream& o, const tensor_ranges& r);
+
/* stupid && inefficient loop structure */
struct tensor_ranges_loop {
tensor_ranges sz;
tensor_ranges cnt;
bool finished_;
public:
- tensor_ranges_loop(const tensor_ranges& t) : sz(t), cnt(t.size()),
finished_(t.size() == 0) {
- std::fill(cnt.begin(), cnt.end(), 0);
+ tensor_ranges_loop(const tensor_ranges& t) : sz(t), cnt(t.size()),
finished_(t.size() == 0) {
+ std::fill(cnt.begin(), cnt.end(), 0);
}
index_type index(dim_type i) { return cnt[i]; }
bool finished() const { return finished_; }
- bool next() {
+ bool next() {
index_type i = 0;
while (++cnt[i] >= sz[i]) {
- cnt[i] = 0; i++; if (i >= sz.size()) { finished_ = true; break; }
+ cnt[i] = 0; i++; if (i >= sz.size()) { finished_ = true; break; }
}
return finished_;
}
@@ -131,15 +131,15 @@ namespace bgeot {
tensor_mask() { set_card(0); }
explicit tensor_mask(const tensor_ranges& r_, const index_set& idxs_) {
assign(r_,idxs_);
- }
+ }
/* constructeur par fusion */
explicit tensor_mask(const tensor_mask& tm1, const tensor_mask& tm2, bool
and_op);
- explicit tensor_mask(const std::vector<const tensor_mask*>& tm);
- explicit tensor_mask(const std::vector<const tensor_mask*> tm1,
- const std::vector<const tensor_mask*> tm2, bool
and_op);
+ explicit tensor_mask(const std::vector<const tensor_mask*>& tm);
+ explicit tensor_mask(const std::vector<const tensor_mask*> tm1,
+ const std::vector<const tensor_mask*> tm2, bool
and_op);
void swap(tensor_mask &tm) {
r.swap(tm.r); idxs.swap(tm.idxs);
- m.swap(tm.m); s.swap(tm.s);
+ m.swap(tm.m); s.swap(tm.s);
std::swap(card_, tm.card_);
std::swap(card_uptodate, tm.card_uptodate);
}
@@ -147,16 +147,16 @@ namespace bgeot {
r = r_; idxs = idxs_; eval_strides(); m.assign(size(),false);
set_card(0);
}
- void assign(const tensor_mask& tm) {
- r = tm.r;
- idxs = tm.idxs;
- m = tm.m;
+ void assign(const tensor_mask& tm) {
+ r = tm.r;
+ idxs = tm.idxs;
+ m = tm.m;
s = tm.s;
card_ = tm.card_; card_uptodate = tm.card_uptodate;
}
void assign(const std::vector<const tensor_mask* >& tm);
void assign(const tensor_mask& tm1, const tensor_mask& tm2, bool and_op);
-
+
void clear() { r.resize(0); idxs.resize(0); m.clear(); s.resize(0);
set_card(0); }
const tensor_ranges& ranges() const { return r; }
const index_set& indexes() const { return idxs; }
@@ -165,32 +165,32 @@ namespace bgeot {
void eval_strides() {
s.resize(r.size()+1); s[0]=1;
for (index_type i=0; i < r.size(); ++i) {
- s[i+1]=s[i]*r[i];
+ s[i+1]=s[i]*r[i];
}
}
index_type ndim() const { return index_type(r.size()); }
index_type size() const { return s[r.size()]; }
void set_card(index_type c) const { card_ = c; card_uptodate = true; }
void unset_card() const { card_uptodate = false; }
- index_type card(bool just_look=false) const {
+ index_type card(bool just_look=false) const {
if (!card_uptodate || just_look) {
index_type c = index_type(std::count_if(m.begin(), m.end(),
[](const auto &x) {return x == true;}));
- if (just_look) return c;
- card_ = c;
+ if (just_look) return c;
+ card_ = c;
}
return card_;
}
index_type pos(tensor_ranges& global_r) const {
index_type p = 0;
- for (index_type i=0; i < r.size(); ++i)
- p+= s[i]*global_r[idxs[i]];
+ for (index_type i=0; i < r.size(); ++i)
+ p+= s[i]*global_r[idxs[i]];
return p;
}
index_type lpos(tensor_ranges& local_r) const {
index_type p = 0;
- for (index_type i=0; i < r.size(); ++i)
- p+= s[i]*local_r[i];
+ for (index_type i=0; i < r.size(); ++i)
+ p+= s[i]*local_r[i];
return p;
}
bool operator()(tensor_ranges& global_r) const {
@@ -212,7 +212,7 @@ namespace bgeot {
eval_strides();
}
explicit tensor_mask(index_type range, Slice slice) {
- set_slice(slice.dim, range, slice.i0);
+ set_slice(slice.dim, range, slice.i0);
}
struct Diagonal {
@@ -225,7 +225,7 @@ namespace bgeot {
assert(n);
r.resize(2); r[0] = r[1] = n;
idxs.resize(2); idxs[0] = dim_type(i0); idxs[1] = dim_type(i1);
- m.assign(n*n, false);
+ m.assign(n*n, false);
for (index_type i=0; i < n; ++i) m[n*i+i]=true;
set_card(n);
eval_strides();
@@ -239,7 +239,7 @@ namespace bgeot {
idxs.resize(2); idxs[0] = dim_type(i0); idxs[1] = dim_type(i1);
m.assign(n*n,false); unset_card();
for (index_type i=0; i < n; ++i)
- for (index_type j=i; j < n; ++j) m[i*n+j]=true;
+ for (index_type j=i; j < n; ++j) m[i*n+j]=true;
eval_strides();
}
void set_full(index_type dim, index_type range) {
@@ -264,7 +264,7 @@ namespace bgeot {
}
void shift_dim_num_ge(dim_type dim, int shift) {
for (dim_type i=0; i < idxs.size(); ++i) {
- if (idxs[i] >= dim) idxs[i] = dim_type(idxs[i] + shift);
+ if (idxs[i] >= dim) idxs[i] = dim_type(idxs[i] + shift);
}
check_assertions();
}
@@ -273,7 +273,7 @@ namespace bgeot {
p.resize(card());
index_type i = 0;
for (tensor_ranges_loop l(r); !l.finished(); l.next()) {
- if (m[lpos(l.cnt)]) p[i++] = lpos(l.cnt);
+ if (m[lpos(l.cnt)]) p[i++] = lpos(l.cnt);
}
assert(i==card());
}
@@ -297,15 +297,15 @@ namespace bgeot {
struct tensor_index_to_mask {
short_type mask_num;
short_type mask_dim;
- tensor_index_to_mask() : mask_num(short_type(-1)),
- mask_dim(short_type(-1)) {}
- bool is_valid() { return mask_num != short_type(-1) &&
- mask_dim != short_type(-1); }
+ tensor_index_to_mask() : mask_num(short_type(-1)),
+ mask_dim(short_type(-1)) {}
+ bool is_valid() { return mask_num != short_type(-1) &&
+ mask_dim != short_type(-1); }
};
- /*
- defini une "forme" de tenseur creux
+ /*
+ defini une "forme" de tenseur creux
la fonction merge permet de faire des unions / intersections entre ces
formes
*/
class tensor_shape {
@@ -317,32 +317,32 @@ namespace bgeot {
(le tenseur est identiquement nul) */
void check_empty_mask() {
if (card() == 0) {
- for (dim_type i=0; i < masks_.size(); ++i) {
- masks_[i].set_zero();
- }
+ for (dim_type i=0; i < masks_.size(); ++i) {
+ masks_[i].set_zero();
+ }
}
}
- static void find_linked_masks(dim_type mnum, const tensor_shape &ts1,
const tensor_shape &ts2,
- dal::bit_vector& treated1, dal::bit_vector&
treated2,
- std::vector<const tensor_mask*>& lstA,
- std::vector<const tensor_mask*>& lstB) {
+ static void find_linked_masks(dim_type mnum, const tensor_shape &ts1,
const tensor_shape &ts2,
+ dal::bit_vector& treated1, dal::bit_vector&
treated2,
+ std::vector<const tensor_mask*>& lstA,
+ std::vector<const tensor_mask*>& lstB) {
// gare aux boucles infinies si aucun des indices n'est valide
assert(mnum < ts1.masks().size());
assert(!treated1[mnum]);
treated1.add(mnum);
lstA.push_back(&ts1.mask(mnum));
for (dim_type i=0; i < ts1.mask(mnum).indexes().size(); ++i) {
- dim_type ii = ts1.mask(mnum).indexes()[i];
- if (ts2.index_is_valid(ii) && !treated2[ts2.index_to_mask_num(ii)])
-
find_linked_masks(ts2.index_to_mask_num(ii),ts2,ts1,treated2,treated1,lstB,lstA);
+ dim_type ii = ts1.mask(mnum).indexes()[i];
+ if (ts2.index_is_valid(ii) && !treated2[ts2.index_to_mask_num(ii)])
+
find_linked_masks(ts2.index_to_mask_num(ii),ts2,ts1,treated2,treated1,lstB,lstA);
}
}
protected:
- dim_type index_to_mask_num(dim_type ii) const {
+ dim_type index_to_mask_num(dim_type ii) const {
if (index_is_valid(ii))
- return dim_type(idx2mask[ii].mask_num); else return dim_type(-1);
+ return dim_type(idx2mask[ii].mask_num); else return dim_type(-1);
}
public:
void clear() { masks_.resize(0); idx2mask.resize(0); }
@@ -351,39 +351,38 @@ namespace bgeot {
masks_.swap(ts.masks_);
}
dim_type ndim() const { return dim_type(idx2mask.size()); }
- bool index_is_valid(dim_type ii) const {
- assert(ii < idx2mask.size()); return idx2mask[ii].is_valid();
+ bool index_is_valid(dim_type ii) const {
+ assert(ii < idx2mask.size()); return idx2mask[ii].is_valid();
}
- const tensor_mask& index_to_mask(dim_type ii) const {
- assert(index_is_valid(ii)); return masks_[idx2mask[ii].mask_num];
+ const tensor_mask& index_to_mask(dim_type ii) const {
+ assert(index_is_valid(ii)); return masks_[idx2mask[ii].mask_num];
}
- dim_type index_to_mask_dim(dim_type ii) const {
- assert(index_is_valid(ii)); return dim_type(idx2mask[ii].mask_dim);
+ dim_type index_to_mask_dim(dim_type ii) const {
+ assert(index_is_valid(ii)); return dim_type(idx2mask[ii].mask_dim);
}
- index_type dim(dim_type ii) const
- { assert(index_is_valid(ii)); return
index_to_mask(ii).ranges()[index_to_mask_dim(ii)];
+ index_type dim(dim_type ii) const
+ { assert(index_is_valid(ii)); return
index_to_mask(ii).ranges()[index_to_mask_dim(ii)];
}
tensor_mask_container& masks() { return masks_; }
const tensor_mask_container& masks() const { return masks_; }
const tensor_mask& mask(dim_type i) const { assert(i<masks_.size());
return masks_[i]; }
- stride_type card(bool just_look=false) const {
- stride_type n = 1;
- for (dim_type i=0; i < masks().size(); ++i)
- n *= masks()[i].card(just_look);
- return n;
- }
+ stride_type card(bool just_look=false) const {
+ stride_type n = 1;
+ for (dim_type i=0; i < masks().size(); ++i)
+ n *= masks()[i].card(just_look);
+ return n;
+ }
void push_mask(const tensor_mask& m) { masks_.push_back(m);
update_idx2mask(); }
void remove_mask(dim_type mdim) { /* be careful with this function.. remove
- only useless mask ! */
+ only useless mask ! */
masks_.erase(masks_.begin()+mdim);
update_idx2mask();
}
void remove_unused_dimensions() {
dim_type nd = 0;
for (dim_type i=0; i < ndim(); ++i) {
- if (index_is_valid(i)) {
- masks_[idx2mask[i].mask_num].indexes()[idx2mask[i].mask_dim] = nd++;
- }
+ if (index_is_valid(i))
+ masks_[idx2mask[i].mask_num].indexes()[idx2mask[i].mask_dim] = nd++;
}
set_ndim_noclean(nd);
update_idx2mask();
@@ -391,26 +390,25 @@ namespace bgeot {
void update_idx2mask() const {
/*
- dim_type N=0;
- for (dim_type i=0; i < masks_.size(); ++i) {
- N = std::max(N, std::max_element(masks_.indexes().begin(),
masks_.indexes.end()));
- }
- idx2mask.resize(N);
+ dim_type N=0;
+ for (dim_type i=0; i < masks_.size(); ++i)
+ N = std::max(N, std::max_element(masks_.indexes().begin(),
masks_.indexes.end()));
+ idx2mask.resize(N);
*/
std::fill(idx2mask.begin(), idx2mask.end(), tensor_index_to_mask());
for (dim_type i=0; i < masks_.size(); ++i) {
- for (dim_type j=0; j < masks_[i].indexes().size(); ++j) {
- dim_type k = masks_[i].indexes()[j];
- GMM_ASSERT3(k < idx2mask.size() && !idx2mask[k].is_valid(), "");
- idx2mask[k].mask_num = i; idx2mask[k].mask_dim = j;
- }
+ for (dim_type j=0; j < masks_[i].indexes().size(); ++j) {
+ dim_type k = masks_[i].indexes()[j];
+ GMM_ASSERT3(k < idx2mask.size() && !idx2mask[k].is_valid(), "");
+ idx2mask[k].mask_num = i; idx2mask[k].mask_dim = j;
+ }
}
}
- void assign_shape(const tensor_shape& other) {
+ void assign_shape(const tensor_shape& other) {
masks_ = other.masks_;
idx2mask = other.idx2mask;
- // update_idx2mask();
+ // update_idx2mask();
}
void set_ndim(dim_type n) {
clear();
@@ -419,7 +417,7 @@ namespace bgeot {
void set_ndim_noclean(dim_type n) {idx2mask.resize(n);}
tensor_shape() {}
-
+
/* create an "empty" shape of dimensions nd */
explicit tensor_shape(dim_type nd) : idx2mask(nd,tensor_index_to_mask()) {
masks_.reserve(16);
@@ -435,7 +433,7 @@ namespace bgeot {
update_idx2mask();
}
- void set_empty(const tensor_ranges& r) {
+ void set_empty(const tensor_ranges& r) {
idx2mask.resize(r.size());
masks_.resize(r.size());
for (dim_type i=0; i < r.size(); ++i) masks_[i].set_empty(i,r[i]);
@@ -448,27 +446,27 @@ namespace bgeot {
/* quelques verifs de base */
GMM_ASSERT3(ts2.ndim() == ndim(), "");
if (ts2.ndim()==0) return; /* c'est un scalaire */
- for (dim_type i = 0; i < ndim(); ++i)
- if (index_is_valid(i) && ts2.index_is_valid(i))
- GMM_ASSERT3(ts2.dim(i) == dim(i), "");
+ for (dim_type i = 0; i < ndim(); ++i)
+ if (index_is_valid(i) && ts2.index_is_valid(i))
+ GMM_ASSERT3(ts2.dim(i) == dim(i), "");
tensor_mask_container new_mask;
dal::bit_vector mask_treated1; mask_treated1.sup(0,masks().size());
dal::bit_vector mask_treated2; mask_treated2.sup(0,ts2.masks().size());
std::vector<const tensor_mask*> lstA, lstB; lstA.reserve(10);
lstB.reserve(10);
for (dim_type i = 0; i < ndim(); ++i) {
- dim_type i1 = dim_type(index_to_mask_num(i));
- dim_type i2 = dim_type(ts2.index_to_mask_num(i));
- lstA.clear(); lstB.clear();
- if (index_is_valid(i) && !mask_treated1[i1])
- find_linked_masks(i1, *this, ts2, mask_treated1, mask_treated2,
- lstA, lstB);
- else if (ts2.index_is_valid(i) && !mask_treated2[i2])
- find_linked_masks(i2, ts2, *this, mask_treated2, mask_treated1,
- lstB, lstA);
- else continue;
- GMM_ASSERT3(lstA.size() || lstB.size(), "");
- new_mask.push_back(tensor_mask(lstA,lstB,and_op));
+ dim_type i1 = dim_type(index_to_mask_num(i));
+ dim_type i2 = dim_type(ts2.index_to_mask_num(i));
+ lstA.clear(); lstB.clear();
+ if (index_is_valid(i) && !mask_treated1[i1])
+ find_linked_masks(i1, *this, ts2, mask_treated1, mask_treated2,
+ lstA, lstB);
+ else if (ts2.index_is_valid(i) && !mask_treated2[i2])
+ find_linked_masks(i2, ts2, *this, mask_treated2, mask_treated1,
+ lstB, lstA);
+ else continue;
+ GMM_ASSERT3(lstA.size() || lstB.size(), "");
+ new_mask.push_back(tensor_mask(lstA,lstB,and_op));
}
masks_ = new_mask;
update_idx2mask();
@@ -476,9 +474,8 @@ namespace bgeot {
}
void shift_dim_num_ge(dim_type dim_num, int shift) {
- for (dim_type m = 0; m < masks().size(); ++m) {
- masks()[m].shift_dim_num_ge(dim_num,shift);
- }
+ for (dim_type m = 0; m < masks().size(); ++m)
+ masks()[m].shift_dim_num_ge(dim_num,shift);
}
/* the permutation vector might be greater than the current ndim,
in which case some indexes will be unused (when p[i] == dim_type(-1))
@@ -488,19 +485,21 @@ namespace bgeot {
/* build the inverse permutation and check that this IS really a
permuation */
for (dim_type i=0; i < p.size(); ++i) {
- if (p[i] != dim_type(-1)) { assert(invp[p[i]] == dim_type(-1));
invp[p[i]] = i; }
+ if (p[i] != dim_type(-1)) {
+ assert(invp[p[i]] == dim_type(-1));
+ invp[p[i]] = i;
+ }
}
for (dim_type i=0; i < invp.size(); ++i) assert(invp[i] != dim_type(-1));
-
+
/* do the permutation (quite simple!) */
for (dim_type m=0; m < masks().size(); ++m) {
- for (dim_type i=0; i < masks()[m].indexes().size(); ++i) {
- if (!revert) {
- masks()[m].indexes()[i] = invp[masks()[m].indexes()[i]];
- } else {
- masks()[m].indexes()[i] = p[masks()[m].indexes()[i]];
- }
- }
+ for (dim_type i=0; i < masks()[m].indexes().size(); ++i) {
+ if (!revert)
+ masks()[m].indexes()[i] = invp[masks()[m].indexes()[i]];
+ else
+ masks()[m].indexes()[i] = p[masks()[m].indexes()[i]];
+ }
}
set_ndim_noclean(dim_type(p.size()));
update_idx2mask();
@@ -533,7 +532,8 @@ namespace bgeot {
ts2.masks.resize(1);
ts2.masks[0].set_diagonal(idx[i0].n, i0, i1);
ts2.idx[i0].mask_num = ts2.idx[i1].mask_num = 0;
- ts2.idx[i0].mask_dim = 0; ts2.idx[i1].mask_dim = 1;
+ ts2.idx[i0].mask_dim = 0;
+ ts2.idx[i1].mask_dim = 1;
}
*/
void print(std::ostream& o) const;
@@ -541,7 +541,7 @@ namespace bgeot {
};
- /* reference to a tensor:
+ /* reference to a tensor:
- a shape
- a data pointer
- a set of strides
@@ -549,15 +549,15 @@ namespace bgeot {
class tensor_ref : public tensor_shape {
std::vector< tensor_strides > strides_;
TDIter *pbase_; /* pointeur sur un pointeur qui designe les données
- ça permet de changer la base pour toute une serie
- de tensor_ref en un coup */
+ ça permet de changer la base pour toute une serie
+ de tensor_ref en un coup */
stride_type base_shift_;
void remove_mask(dim_type mdim) {
tensor_shape::remove_mask(mdim);
assert(strides_[mdim].size() == 0 ||
- (strides_[mdim].size() == 1 && strides_[mdim][0] == 0)); /* sanity
check.. */
+ (strides_[mdim].size() == 1 && strides_[mdim][0] == 0)); /*
sanity check.. */
strides_.erase(strides_.begin()+mdim);
}
public:
@@ -576,16 +576,17 @@ namespace bgeot {
void clear() { strides_.resize(0); pbase_ = 0; base_shift_ = 0;
tensor_shape::clear(); }
-
+
/* s'assure que le stride du premier indice est toujours bien égal à zéro
*/
void ensure_0_stride() {
for (index_type i=0; i < strides_.size(); ++i) {
- if (strides_[i].size() >= 1 && strides_[i][0] != 0) {
- stride_type s = strides_[i][0];
- base_shift_ += s;
- for (index_type j=0; j < strides_[i].size(); ++j) strides_[i][j] -= s;
- }
+ if (strides_[i].size() >= 1 && strides_[i][0] != 0) {
+ stride_type s = strides_[i][0];
+ base_shift_ += s;
+ for (index_type j=0; j < strides_[i].size(); ++j)
+ strides_[i][j] -= s;
+ }
}
}
@@ -595,7 +596,7 @@ namespace bgeot {
strides_.reserve(16);
init_strides();
}
- explicit tensor_ref(const tensor_ranges& r, TDIter *pbase__=0)
+ explicit tensor_ref(const tensor_ranges& r, TDIter *pbase__=0)
: tensor_shape(r), pbase_(pbase__), base_shift_(0) {
strides_.reserve(16);
init_strides();
@@ -604,10 +605,11 @@ namespace bgeot {
strides_.resize(masks().size());
stride_type s = 1;
for (dim_type i = 0; i < strides_.size(); ++i) {
- index_type n = mask(i).card();
- strides_[i].resize(n);
- for (index_type j=0;j<n;++j) strides_[i][j] = j*s;
- s *= n;
+ index_type n = mask(i).card();
+ strides_[i].resize(n);
+ for (index_type j=0;j<n;++j)
+ strides_[i][j] = j*s;
+ s *= n;
}
}
tensor_ref() : pbase_(0), base_shift_(0) { strides_.reserve(16); }
@@ -636,7 +638,7 @@ namespace bgeot {
void print_() const { print(cerr); }
};
-
+
std::ostream& operator<<(std::ostream& o, const tensor_mask& m);
std::ostream& operator<<(std::ostream& o, const tensor_shape& ts);
std::ostream& operator<<(std::ostream& o, const tensor_ref& tr);
@@ -660,9 +662,9 @@ namespace bgeot {
}
stride_type mean_increm; /* valeur moyenne de l'increment (utilisé pour le
tri) */
tensor_strides inc; /* not strides but increments to the next index value,
- with inc[range-1] == -sum(inc[0..range-2])
(automatic rewinding!)
- of course, stride_type MUST be signed
- */
+ with inc[range-1] == -sum(inc[0..range-2])
(automatic rewinding!)
+ of course, stride_type MUST be signed
+ */
std::bitset<32> have_regular_strides;
};
@@ -681,24 +683,24 @@ namespace bgeot {
struct index_value_data {
dim_type cnt_num;
const stride_type **ppinc; /* pointe vers pr[cnt_num].pinc, initialisé
par rewind()
- et pas avant (à cause de pbs lors de la copie
de multi_tensor_iterator sinon)
- permet de déduire la valeur du compteur:
(*ppinc - pincbase) (à diviser par nn=(pri[cnt_num].n-N))
- */
+ et pas avant (à cause de pbs lors de la
copie de multi_tensor_iterator sinon)
+ permet de déduire la valeur du compteur:
(*ppinc - pincbase) (à diviser par nn=(pri[cnt_num].n-N))
+ */
const stride_type *pincbase;
const stride_type *pposbase; /* pointe dans pri[cnt_num].mask_pos,
retrouve la position dans le masque en fonction
- du compteur déduit ci-dessus et des champs
div et mod ci-dessous */
+ du compteur déduit ci-dessus et des
champs div et mod ci-dessous */
index_type div, mod, nn;
stride_type pos_; /* stores the position when the indexe is not part of
the pri array
- (hence the index only has 1 value, and ppos == &pos_,
and pcnt = &zero */
+ (hence the index only has 1 value, and ppos ==
&pos_, and pcnt = &zero */
};
std::vector<index_value_data> idxval;
std::vector<stride_type> vectorized_strides_; /* if the tensor have
regular strides, the mti might be vectorizable */
- index_type vectorized_size_; /* the size of each
vectorizable chunk */
- index_type vectorized_pr_dim; /* all pr[i], i >=
vectorized_pr_dim, can be accessed via vectorized_strides */
+ index_type vectorized_size_; /* the size of each
vectorizable chunk */
+ index_type vectorized_pr_dim; /* all pr[i], i >=
vectorized_pr_dim, can be accessed via vectorized_strides */
public:
- void clear() {
- N = 0; pr.clear(); pri.clear(); bloc_rank.clear(); bloc_nelt.clear();
- it.clear(); pit0.clear(); itbase.clear(); idxval.clear();
+ void clear() {
+ N = 0; pr.clear(); pri.clear(); bloc_rank.clear(); bloc_nelt.clear();
+ it.clear(); pit0.clear(); itbase.clear(); idxval.clear();
}
void swap(multi_tensor_iterator& m) {
std::swap(N,m.N); pr.swap(m.pr); pri.swap(m.pri);
@@ -706,24 +708,26 @@ namespace bgeot {
it.swap(m.it); pit0.swap(m.pit0); itbase.swap(m.itbase);
idxval.swap(m.idxval);
}
- void rewind() {
- for (dim_type i=0; i < pr.size(); ++i) {
- pr[i].pinc = pr[i].begin = &pri[i].inc[0]; pr[i].end =
pr[i].begin+pri[i].inc.size();
+ void rewind() {
+ for (dim_type i=0; i < pr.size(); ++i) {
+ pr[i].pinc = pr[i].begin = &pri[i].inc[0];
+ pr[i].end = pr[i].begin+pri[i].inc.size();
}
- for (dim_type n=0; n < N; ++n) it[n] = *(pit0[n]) + itbase[n];
+ for (dim_type n=0; n < N; ++n)
+ it[n] = *(pit0[n]) + itbase[n];
for (dim_type i=0; i < idxval.size(); ++i) {
- if (idxval[i].cnt_num != dim_type(-1)) {
- idxval[i].ppinc = &pr[idxval[i].cnt_num].pinc;
- idxval[i].pincbase = &pri[idxval[i].cnt_num].inc[0];
- idxval[i].pposbase = &pri[idxval[i].cnt_num].mask_pos[0];
- idxval[i].nn = (N-pri[idxval[i].cnt_num].n);
- } else {
- static const stride_type *null=0;
- idxval[i].ppinc = &null;
- idxval[i].pincbase = 0;
- idxval[i].pposbase = &idxval[i].pos_;
- idxval[i].nn = 1;
- }
+ if (idxval[i].cnt_num != dim_type(-1)) {
+ idxval[i].ppinc = &pr[idxval[i].cnt_num].pinc;
+ idxval[i].pincbase = &pri[idxval[i].cnt_num].inc[0];
+ idxval[i].pposbase = &pri[idxval[i].cnt_num].mask_pos[0];
+ idxval[i].nn = (N-pri[idxval[i].cnt_num].n);
+ } else {
+ static const stride_type *null=0;
+ idxval[i].ppinc = &null;
+ idxval[i].pincbase = 0;
+ idxval[i].pposbase = &idxval[i].pos_;
+ idxval[i].nn = 1;
+ }
}
}
dim_type ndim() const { return dim_type(idxval.size()); }
@@ -738,15 +742,16 @@ namespace bgeot {
bool next(unsigned i_stop = unsigned(-1), unsigned i0_ = unsigned(-2))
{//=pr.size()-1) {
unsigned i0 = unsigned(i0_ == unsigned(-2) ? pr.size()-1 : i0_);
while (i0 != i_stop) {
- for (unsigned n = pr[i0].n; n < N; ++n) {
- // index_type pos = pr[i0].cnt * (N-pri[i0].n) + (n - pri[i0].n);
- it[n] += *pr[i0].pinc; pr[i0].pinc++;
- }
- if (pr[i0].pinc != pr[i0].end) {
- return true;
- } else {
- pr[i0].pinc = pr[i0].begin; i0--;
- }
+ for (unsigned n = pr[i0].n; n < N; ++n) {
+ // index_type pos = pr[i0].cnt * (N-pri[i0].n) + (n - pri[i0].n);
+ it[n] += *pr[i0].pinc; pr[i0].pinc++;
+ }
+ if (pr[i0].pinc != pr[i0].end) {
+ return true;
+ } else {
+ pr[i0].pinc = pr[i0].begin;
+ i0--;
+ }
}
return false;
}
@@ -759,27 +764,29 @@ namespace bgeot {
if (pr.size() == 0) return false;
std::vector<packed_range>::reverse_iterator p_ = pr.rbegin();
while (p_!=pr.rend()) {
- it[0] += *(p_->pinc++);
- if (p_->pinc != p_->end) {
- return true;
- } else {
- p_->pinc = p_->begin; p_++;
- }
+ it[0] += *(p_->pinc++);
+ if (p_->pinc != p_->end) {
+ return true;
+ } else {
+ p_->pinc = p_->begin;
+ p_++;
+ }
}
return false;
}
- bool qnext2() {
+ bool qnext2() {
if (pr.size() == 0) return false;
std::vector<packed_range>::reverse_iterator p_ = pr.rbegin();
while (p_!=pr.rend()) {
- it[0] += *(p_->pinc++);
- it[1] += *(p_->pinc++);
- if (p_->pinc != p_->end) {
- return true;
- } else {
- p_->pinc = p_->begin; p_++;
- }
+ it[0] += *(p_->pinc++);
+ it[1] += *(p_->pinc++);
+ if (p_->pinc != p_->end) {
+ return true;
+ } else {
+ p_->pinc = p_->begin;
+ p_++;
+ }
}
return false;
}
@@ -802,8 +809,8 @@ namespace bgeot {
multi_tensor_iterator m(tr0, with_index_values);
swap(m);
}
- multi_tensor_iterator(const tensor_ref& tr0,
- const tensor_ref& tr1, bool with_index_values) {
+ multi_tensor_iterator(const tensor_ref& tr0,
+ const tensor_ref& tr1, bool with_index_values) {
std::vector<tensor_ref> trtab(2); trtab[0] = tr0; trtab[1] = tr1;
init(trtab, with_index_values);
}
@@ -811,9 +818,9 @@ namespace bgeot {
multi_tensor_iterator m(tr0, tr1, with_index_values);
swap(m);
}
- multi_tensor_iterator(const tensor_ref& tr0,
- const tensor_ref& tr1,
- const tensor_ref& tr2, bool with_index_values) {
+ multi_tensor_iterator(const tensor_ref& tr0,
+ const tensor_ref& tr1,
+ const tensor_ref& tr2, bool with_index_values) {
std::vector<tensor_ref> trtab(3); trtab[0] = tr0; trtab[1] = tr1;
trtab[2] = tr2;
init(trtab, with_index_values);
}
@@ -828,9 +835,9 @@ namespace bgeot {
/* handles a tree of reductions
The tree is used if more than two tensors are reduced, i.e.
- z(:,:)=t(:,i).u(i,j).v(j,:)
+ z(:,:)=t(:,i).u(i,j).v(j,:)
in that case, the reduction against j can be performed on u(:,j).v(j,:) =
w(:,:)
- and then, z(:,:) = t(:,i).w(i,:)
+ and then, z(:,:) = t(:,i).w(i,:)
*/
struct tensor_reduction {
struct tref_or_reduction {
@@ -838,24 +845,24 @@ namespace bgeot {
std::shared_ptr<tensor_reduction> reduction;
tensor_ref &tr() { return tr_; }
const tensor_ref &tr() const { return tr_; }
- explicit tref_or_reduction(const tensor_ref &tr__, const std::string& s)
- : tr_(tr__), ridx(s) {}
- explicit tref_or_reduction(const std::shared_ptr<tensor_reduction> &p,
const std::string& s)
- : reduction(p), ridx(s) {
- reduction->result(tr_);
+ explicit tref_or_reduction(const tensor_ref &tr__, const std::string& s)
+ : tr_(tr__), ridx(s) {}
+ explicit tref_or_reduction(const std::shared_ptr<tensor_reduction> &p,
const std::string& s)
+ : reduction(p), ridx(s) {
+ reduction->result(tr_);
}
bool is_reduction() const { return reduction != 0; }
void swap(tref_or_reduction &other) { tr_.swap(other.tr_);
std::swap(reduction, other.reduction); }
std::string ridx; /* reduction indexes, no index can appear
- twice in the same tensor */
+ twice in the same tensor */
std::vector<dim_type> gdim; /* mapping to the global virtual
- tensor whose range is the
- union of the ranges of each
- reduced tensor */
+ tensor whose range is the
+ union of the ranges of each
+ reduced tensor */
std::vector<dim_type> rdim; /* mapping to the dimensions of the
- reduced tensor ( = dim_type(-1) for
- dimensions i s.t. ridx[i] != ' ' ) */
-
+ reduced tensor ( = dim_type(-1) for
+ dimensions i s.t. ridx[i] != ' ' ) */
+
};
tensor_ranges reduced_range;
std::string reduction_chars; /* list of all indexes used for reduction */
@@ -877,10 +884,9 @@ namespace bgeot {
*/
static void diag_shape(tensor_shape& ts, const std::string& s) {
for (index_type i=0; i < s.length(); ++i) {
- size_type pos = s.find(s[i]);
- if (s[i] != ' ' && pos != i) { // ce n'est pas de l'indice => reduction
sur la diagonale
- ts = ts.diag_shape(tensor_mask::Diagonal(dim_type(pos),dim_type(i)));
- }
+ size_type pos = s.find(s[i]);
+ if (s[i] != ' ' && pos != i) // ce n'est pas de l'indice => reduction
sur la diagonale
+ ts = ts.diag_shape(tensor_mask::Diagonal(dim_type(pos),dim_type(i)));
}
}
diff --git a/src/getfem_mat_elem.cc b/src/getfem_mat_elem.cc
index f99b628b..66557426 100644
--- a/src/getfem_mat_elem.cc
+++ b/src/getfem_mat_elem.cc
@@ -386,18 +386,20 @@ namespace getfem {
es_end.resize(0); es_end.resize(pme->size());
Vtab.resize(pme->size());
size_type nm = 0;
- if (first) memset(&(*t.begin()), 0, t.size()*sizeof(*t.begin()));
//std::fill(t.begin(), t.end(), 0.0);
+ if (first)
+ memset(&(*t.begin()), 0, t.size()*sizeof(*t.begin()));
//std::fill(t.begin(), t.end(), 0.0);
for (k = 0, nm = 0; k < pme->size(); ++k) {
if (elmt_stored[k].size() != 1) {
es_beg[nm] = elmt_stored[k].begin();
es_end[nm] = elmt_stored[k].end();
pts[nm] = elmt_stored[k].begin();
++nm;
- } else J *= elmt_stored[k][0];
+ } else
+ J *= elmt_stored[k][0];
}
- if (nm == 0) {
+ if (nm == 0)
t[0] += J;
- } else {
+ else {
BLAS_INT n0 = BLAS_INT(es_end[0] - es_beg[0]);
base_tensor::const_iterator pts0 = pts[0];
@@ -409,8 +411,8 @@ namespace getfem {
for (V = Vtab[k]; k; --k)
Vtab[k-1] = V = *pts[k] * V;
GMM_ASSERT1(pt+n0 <= t.end(), "Internal error");
- gmm::daxpy_(&n0, &V, const_cast<double*>(&(pts0[0])), &one,
- (double*)&(*pt), &one);
+ gmm::daxpy_(&n0, &V, const_cast<double*>(&(pts0[0])), &one,
+ (double*)&(*pt), &one);
pt+=n0;
for (k=1; k != nm && ++pts[k] == es_end[k]; ++k)
pts[k] = es_beg[k];
@@ -422,7 +424,8 @@ namespace getfem {
void pre_tensors_for_linear_trans(bool volumic) const {
- if ((volumic && volume_computed) || (!volumic && faces_computed)) return;
+ if ((volumic && volume_computed) || (!volumic && faces_computed))
+ return;
// scalar_type exectime = ftool::uclock_sec();
bgeot::multi_index sizes = pme->sizes(0), mi(sizes.size());
@@ -538,7 +541,7 @@ namespace getfem {
dim_type P = dim_type(dim), N = dim_type(G.nrows());
short_type NP = short_type(pgt->nb_points());
fem_interpolation_context ctx(pgp, 0, 0, G, elt,
- short_type(ir-1));
+ short_type(ir-1));
GMM_ASSERT1(G.ncols() == NP, "dimensions mismatch");
if (ir > 0) {
@@ -645,7 +648,7 @@ namespace getfem {
bool prefer_comp_on_real_element) {
dal::pstatic_stored_object_key
pk = std::make_shared<emelem_comp_key_>(pm, pi, pg,
- prefer_comp_on_real_element);
+ prefer_comp_on_real_element);
dal::pstatic_stored_object o = dal::search_stored_object(pk);
if (o) return std::dynamic_pointer_cast<const mat_elem_computation>(o);
pmat_elem_computation
diff --git a/src/gmm/gmm_blas.h b/src/gmm/gmm_blas.h
index b8390976..66531e29 100644
--- a/src/gmm/gmm_blas.h
+++ b/src/gmm/gmm_blas.h
@@ -65,7 +65,8 @@ namespace gmm {
{ linalg_traits<L>::do_clear(linalg_const_cast(l)); }
///@endcond
- /** count the number of non-zero entries of a vector or matrix. */ template
<typename L> inline size_type nnz(const L& l)
+ /** count the number of non-zero entries of a vector or matrix. */
+ template <typename L> inline size_type nnz(const L& l)
{ return nnz(l, typename linalg_traits<L>::linalg_type()); }
///@cond DOXY_SHOW_ALL_FUNCTIONS
@@ -1273,8 +1274,8 @@ namespace gmm {
@param l2 contains on output, l2+l1.
*/
template <typename L1, typename L2> inline
- void add(const L1& l1, L2& l2) {
- add_spec(l1, l2, typename linalg_traits<L2>::linalg_type());
+ void add(const L1& l1, L2& l2) {
+ add_spec(l1, l2, typename linalg_traits<L2>::linalg_type());
}
///@cond
diff --git a/src/gmm/gmm_lapack_interface.h b/src/gmm/gmm_lapack_interface.h
index e2a5f129..135721fb 100644
--- a/src/gmm/gmm_lapack_interface.h
+++ b/src/gmm/gmm_lapack_interface.h
@@ -95,11 +95,11 @@ namespace gmm {
void sgeesx_(...); void dgeesx_(...); void cgeesx_(...); void zgeesx_(...);
void sgesvd_(...); void dgesvd_(...); void cgesvd_(...); void zgesvd_(...);
}
-
+
/* ********************************************************************** */
/* LU decomposition. */
/* ********************************************************************** */
-
+
# define getrf_interface(lapack_name, base_type) inline \
size_type lu_factor(dense_matrix<base_type> &A, lapack_ipvt &ipvt){ \
GMMLAPACK_TRACE("getrf_interface"); \
@@ -128,7 +128,7 @@ namespace gmm {
if (n) \
lapack_name(&t,&n,&nrhs,&(A(0,0)),&n,&ipvt[0],&x[0],&n,&info); \
}
-
+
# define getrs_trans_n const char t = 'N'
# define getrs_trans_t const char t = 'T'
@@ -184,7 +184,7 @@ namespace gmm {
GMM_ASSERT1(!info, "QR factorization failed"); \
} \
}
-
+
geqrf_interface(sgeqrf_, BLAS_S)
geqrf_interface(dgeqrf_, BLAS_D)
// For complex values, housholder vectors are not the same as in
@@ -220,7 +220,7 @@ namespace gmm {
geqrf_interface2(dgeqrf_, dorgqr_, BLAS_D)
geqrf_interface2(cgeqrf_, cungqr_, BLAS_C)
geqrf_interface2(zgeqrf_, zungqr_, BLAS_Z)
-
+
/* ********************************************************************** */
/* QR algorithm for eigenvalues search. */
/* ********************************************************************** */
@@ -332,8 +332,8 @@ namespace gmm {
geev_interface(sgeev_, BLAS_S, left)
geev_interface(dgeev_, BLAS_D, left)
geev_interface2(cgeev_, BLAS_C, left)
- geev_interface2(zgeev_, BLAS_Z, left)
-
+ geev_interface2(zgeev_, BLAS_Z, left)
+
/* ********************************************************************** */
/* SCHUR algorithm: */
@@ -402,7 +402,7 @@ namespace gmm {
/* Interface to SVD. Does not correspond to a Gmm++ functionnality. */
/* Author : Sebastian Nowozin <sebastian.nowozin@tuebingen.mpg.de> */
/* ********************************************************************** */
-
+
# define gesvd_interface(lapack_name, base_type) inline \
void svd(dense_matrix<base_type> &X, \
dense_matrix<base_type> &U, \
@@ -442,7 +442,7 @@ namespace gmm {
&m, &Vtransposed(0,0), &n, &work[0], &lwork, \
&rwork[0], &info); \
}
-
+
gesvd_interface(sgesvd_, BLAS_S)
gesvd_interface(dgesvd_, BLAS_D)
cgesvd_interface(cgesvd_, BLAS_C, BLAS_S)
@@ -453,9 +453,6 @@ namespace gmm {
MAT X(X_);
svd(X, U, Vtransposed, sigma);
}
-
-
-
}
@@ -467,14 +464,14 @@ template <typename MAT>
void schur(const MAT &, MAT &, MAT &)
{
GMM_ASSERT1(false, "Use of function schur(A,S,Q) requires GetFEM "
- "to be built with Lapack");
+ "to be built with Lapack");
}
inline void svd(dense_matrix<BLAS_S> &, dense_matrix<BLAS_S> &,
dense_matrix<BLAS_S> &, std::vector<BLAS_S> &)
{
GMM_ASSERT1(false, "Use of function svd(X,U,Vtransposed,sigma) requires
GetFEM "
- "to be built with Lapack");
+ "to be built with Lapack");
}
inline void svd(dense_matrix<BLAS_D> &, dense_matrix<BLAS_D> &,
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [Getfem-commits] [getfem-commits] branch master updated: Whitespace fixes,
Konstantinos Poulios <=