[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Commit-gnuradio] [gnuradio] 06/06: volk: remove unused spu_lib director
From: |
git |
Subject: |
[Commit-gnuradio] [gnuradio] 06/06: volk: remove unused spu_lib directory from VOLK. |
Date: |
Wed, 29 Jan 2014 00:04:26 +0000 (UTC) |
This is an automated email from the git hooks/post-receive script.
trondeau pushed a commit to branch master
in repository gnuradio.
commit 19d111e2448a58e20ff5c1c80ca69751376b2544
Author: Doug Geiger <address@hidden>
Date: Tue Jan 28 17:59:35 2014 -0500
volk: remove unused spu_lib directory from VOLK.
Addresses Issues #638.
---
volk/spu_lib/gc_spu_macs.h | 380 ---------------------
volk/spu_lib/spu_16s_cmpgt_unaligned.c | 160 ---------
volk/spu_lib/spu_16s_vector_subtract_unaligned.c | 178 ----------
volk/spu_lib/spu_16s_vector_sum_unaligned.c | 178 ----------
.../spu_32fc_pointwise_multiply_unaligned.c | 222 ------------
volk/spu_lib/spu_memcpy_unaligned.c | 290 ----------------
volk/spu_lib/spu_memset_unaligned.S | 185 ----------
7 files changed, 1593 deletions(-)
diff --git a/volk/spu_lib/gc_spu_macs.h b/volk/spu_lib/gc_spu_macs.h
deleted file mode 100644
index e86dce3..0000000
--- a/volk/spu_lib/gc_spu_macs.h
+++ /dev/null
@@ -1,380 +0,0 @@
-/* -*- asm -*- */
-/*
- * Copyright 2008 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#ifndef INCLUDED_GC_SPU_MACS_H
-#define INCLUDED_GC_SPU_MACS_H
-
-/*
- * This file contains a set of macros that are generally useful when
- * coding in SPU assembler
- *
- * Note that the multi-instruction macros in here may overwrite
- * registers 77, 78, and 79 without warning.
- */
-
-/*
- * defines for all registers
- */
-#define r0 $0
-#define r1 $1
-#define r2 $2
-#define r3 $3
-#define r4 $4
-#define r5 $5
-#define r6 $6
-#define r7 $7
-#define r8 $8
-#define r9 $9
-#define r10 $10
-#define r11 $11
-#define r12 $12
-#define r13 $13
-#define r14 $14
-#define r15 $15
-#define r16 $16
-#define r17 $17
-#define r18 $18
-#define r19 $19
-#define r20 $20
-#define r21 $21
-#define r22 $22
-#define r23 $23
-#define r24 $24
-#define r25 $25
-#define r26 $26
-#define r27 $27
-#define r28 $28
-#define r29 $29
-#define r30 $30
-#define r31 $31
-#define r32 $32
-#define r33 $33
-#define r34 $34
-#define r35 $35
-#define r36 $36
-#define r37 $37
-#define r38 $38
-#define r39 $39
-#define r40 $40
-#define r41 $41
-#define r42 $42
-#define r43 $43
-#define r44 $44
-#define r45 $45
-#define r46 $46
-#define r47 $47
-#define r48 $48
-#define r49 $49
-#define r50 $50
-#define r51 $51
-#define r52 $52
-#define r53 $53
-#define r54 $54
-#define r55 $55
-#define r56 $56
-#define r57 $57
-#define r58 $58
-#define r59 $59
-#define r60 $60
-#define r61 $61
-#define r62 $62
-#define r63 $63
-#define r64 $64
-#define r65 $65
-#define r66 $66
-#define r67 $67
-#define r68 $68
-#define r69 $69
-#define r70 $70
-#define r71 $71
-#define r72 $72
-#define r73 $73
-#define r74 $74
-#define r75 $75
-#define r76 $76
-#define r77 $77
-#define r78 $78
-#define r79 $79
-#define r80 $80
-#define r81 $81
-#define r82 $82
-#define r83 $83
-#define r84 $84
-#define r85 $85
-#define r86 $86
-#define r87 $87
-#define r88 $88
-#define r89 $89
-#define r90 $90
-#define r91 $91
-#define r92 $92
-#define r93 $93
-#define r94 $94
-#define r95 $95
-#define r96 $96
-#define r97 $97
-#define r98 $98
-#define r99 $99
-#define r100 $100
-#define r101 $101
-#define r102 $102
-#define r103 $103
-#define r104 $104
-#define r105 $105
-#define r106 $106
-#define r107 $107
-#define r108 $108
-#define r109 $109
-#define r110 $110
-#define r111 $111
-#define r112 $112
-#define r113 $113
-#define r114 $114
-#define r115 $115
-#define r116 $116
-#define r117 $117
-#define r118 $118
-#define r119 $119
-#define r120 $120
-#define r121 $121
-#define r122 $122
-#define r123 $123
-#define r124 $124
-#define r125 $125
-#define r126 $126
-#define r127 $127
-
-
-#define lr r0 // link register
-#define sp r1 // stack pointer
- // r2 is environment pointer for langs that need it
(ALGOL)
-
-#define retval r3 // return values are passed in regs starting at
r3
-
-#define arg1 r3 // args are passed in regs starting at r3
-#define arg2 r4
-#define arg3 r5
-#define arg4 r6
-#define arg5 r7
-#define arg6 r8
-#define arg7 r9
-#define arg8 r10
-#define arg9 r11
-#define arg10 r12
-
-// r3 - r74 are volatile (caller saves)
-// r74 - r79 are volatile (scratch regs possibly destroyed by fct
prolog/epilog)
-// r80 - r127 are non-volatile (caller-saves)
-
-// scratch registers reserved for use by the macros in this file.
-
-#define _gc_t0 r79
-#define _gc_t1 r78
-#define _gc_t2 r77
-
-/*
- * ----------------------------------------------------------------
- * pseudo ops
- * ----------------------------------------------------------------
- */
-#define PROC_ENTRY(name) \
- .text; \
- .p2align 4; \
- .global name; \
- .type name, @function; \
-name:
-
-/*
- * ----------------------------------------------------------------
- * aliases for common operations
- * ----------------------------------------------------------------
- */
-
-// Move register (even pipe, 2 cycles)
-#define MR(rt, ra) or rt, ra, ra;
-
-// Move register (odd pipe, 4 cycles)
-#define LMR(rt, ra) rotqbyi rt, ra, 0;
-
-// return
-#define RETURN() bi lr;
-
-// hint for a return
-#define HINT_RETURN(ret_label) hbr ret_label, lr;
-
-// return if zero
-#define BRZ_RETURN(rt) biz rt, lr;
-
-// return if not zero
-#define BRNZ_RETURN(rt) binz rt, lr;
-
-// return if halfword zero
-#define BRHZ_RETURN(rt) bihz rt, lr;
-
-// return if halfword not zero
-#define BRHNZ_RETURN(rt) bihnz rt, lr;
-
-
-/*
- * ----------------------------------------------------------------
- * modulo like things for constant moduli that are powers of 2
- * ----------------------------------------------------------------
- */
-
-// rt = ra & (pow2 - 1)
-#define MODULO(rt, ra, pow2) \
- andi rt, ra, (pow2)-1;
-
-// rt = pow2 - (ra & (pow2 - 1))
-#define MODULO_NEG(rt, ra, pow2) \
- andi rt, ra, (pow2)-1; \
- sfi rt, rt, (pow2);
-
-// rt = ra & -(pow2)
-#define ROUND_DOWN(rt, ra, pow2) \
- andi rt, ra, -(pow2);
-
-// rt = (ra + (pow2 - 1)) & -(pow2)
-#define ROUND_UP(rt, ra, pow2) \
- ai rt, ra, (pow2)-1; \
- andi rt, rt, -(pow2);
-
-/*
- * ----------------------------------------------------------------
- * Splat - replicate a particular slot into all slots
- * Altivec analogs...
- * ----------------------------------------------------------------
- */
-
-// replicate byte from slot s [0,15]
-#define VSPLTB(rt, ra, s) \
- ilh _gc_t0, (s)*0x0101; \
- shufb rt, ra, ra, _gc_t0;
-
-// replicate halfword from slot s [0,7]
-#define VSPLTH(rt, ra, s) \
- ilh _gc_t0, 2*(s)*0x0101 + 0x0001; \
- shufb rt, ra, ra, _gc_t0;
-
-// replicate word from slot s [0,3]
-#define VSPLTW(rt, ra, s) \
- iluh _gc_t0, 4*(s)*0x0101 + 0x0001; \
- iohl _gc_t0, 4*(s)*0x0101 + 0x0203; \
- shufb rt, ra, ra, _gc_t0;
-
-// replicate double from slot s [0,1]
-#define VSPLTD(rt, ra, s) \
- /* sp is always 16-byte aligned */ \
- cdd _gc_t0, 8(sp); /* 0x10111213 14151617 00010203
04050607 */ \
- rotqbyi rt, ra, ra, (s) << 3; /* rotate double into preferred slot
*/ \
- shufb rt, rt, rt, _gc_t0;
-
-/*
- * ----------------------------------------------------------------
- * lots of min/max variations...
- *
- * On a slot by slot basis, compute the min or max
- *
- * U - unsigned, else signed
- * B,H,{} - byte, halfword, word
- * F float
- * ----------------------------------------------------------------
- */
-
-#define MIN_SELB(rt, ra, rb, rc) selb rt, ra, rb, rc;
-#define MAX_SELB(rt, ra, rb, rc) selb rt, rb, ra, rc;
-
- // words
-
-#define MIN(rt, ra, rb) \
- cgt _gc_t0, ra, rb; \
- MIN_SELB(rt, ra, rb, _gc_t0)
-
-#define MAX(rt, ra, rb) \
- cgt _gc_t0, ra, rb; \
- MAX_SELB(rt, ra, rb, _gc_t0)
-
-#define UMIN(rt, ra, rb) \
- clgt _gc_t0, ra, rb; \
- MIN_SELB(rt, ra, rb, _gc_t0)
-
-#define UMAX(rt, ra, rb) \
- clgt _gc_t0, ra, rb; \
- MAX_SELB(rt, ra, rb, _gc_t0)
-
- // bytes
-
-#define MINB(rt, ra, rb) \
- cgtb _gc_t0, ra, rb; \
- MIN_SELB(rt, ra, rb, _gc_t0)
-
-#define MAXB(rt, ra, rb) \
- cgtb _gc_t0, ra, rb; \
- MAX_SELB(rt, ra, rb, _gc_t0)
-
-#define UMINB(rt, ra, rb) \
- clgtb _gc_t0, ra, rb; \
- MIN_SELB(rt, ra, rb, _gc_t0)
-
-#define UMAXB(rt, ra, rb) \
- clgtb _gc_t0, ra, rb; \
- MAX_SELB(rt, ra, rb, _gc_t0)
-
- // halfwords
-
-#define MINH(rt, ra, rb) \
- cgth _gc_t0, ra, rb; \
- MIN_SELB(rt, ra, rb, _gc_t0)
-
-#define MAXH(rt, ra, rb) \
- cgth _gc_t0, ra, rb; \
- MAX_SELB(rt, ra, rb, _gc_t0)
-
-#define UMINH(rt, ra, rb) \
- clgth _gc_t0, ra, rb; \
- MIN_SELB(rt, ra, rb, _gc_t0)
-
-#define UMAXH(rt, ra, rb) \
- clgth _gc_t0, ra, rb; \
- MAX_SELB(rt, ra, rb, _gc_t0)
-
- // floats
-
-#define FMIN(rt, ra, rb) \
- fcgt _gc_t0, ra, rb; \
- MIN_SELB(rt, ra, rb, _gc_t0)
-
-#define FMAX(rt, ra, rb) \
- fcgt _gc_t0, ra, rb; \
- MAX_SELB(rt, ra, rb, _gc_t0)
-
-// Ignoring the sign, select the values with the minimum magnitude
-#define FMINMAG(rt, ra, rb) \
- fcmgt _gc_t0, ra, rb; \
- MIN_SELB(rt, ra, rb, _gc_t0)
-
-// Ignoring the sign, select the values with the maximum magnitude
-#define FMAXMAG(rt, ra, rb) \
- fcmgt _gc_t0, ra, rb; \
- MAX_SELB(rt, ra, rb, _gc_t0)
-
-
-#endif /* INCLUDED_GC_SPU_MACS_H */
diff --git a/volk/spu_lib/spu_16s_cmpgt_unaligned.c
b/volk/spu_lib/spu_16s_cmpgt_unaligned.c
deleted file mode 100644
index 8811e68..0000000
--- a/volk/spu_lib/spu_16s_cmpgt_unaligned.c
+++ /dev/null
@@ -1,160 +0,0 @@
-#include<spu_intrinsics.h>
-
-void* libvector_16s_cmpgt_unaligned(void* target, void* src, signed short val,
unsigned int num_bytes){
- //loop iterator i
- int i = 0;
- void* retval = target;
-
-
- //put the target and source addresses into qwords
- vector unsigned int address_counter_tgt = {(unsigned int)target, 0, 0,
0};
- vector unsigned int address_counter_src = {(unsigned int)src, 0, 0 ,0};
-
- //create shuffle masks
-
- //shuffle mask building blocks:
- //all from the first vector
- vector unsigned char oneup = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
0x07,
- 0x08, 0x09,
0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f};
- //all from the second vector
- vector unsigned char second_oneup = {0x10, 0x11, 0x12, 0x13, 0x14,
0x15, 0x16, 0x17,
-
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f};
-
-
-
- //gamma: second half of the second, first half of the first, break at
(unsigned int)src%16
- vector unsigned char src_cmp = spu_splats((unsigned char)((unsigned
int)src%16));
- vector unsigned char gt_res = spu_cmpgt(oneup, src_cmp);
- vector unsigned char eq_res = spu_cmpeq(oneup, src_cmp);
- vector unsigned char cmp_res = spu_or(gt_res, eq_res);
- vector unsigned char sixteen_uchar = spu_splats((unsigned char)16);
- vector unsigned char phase_change = spu_and(sixteen_uchar, cmp_res);
- vector unsigned int shuffle_mask_gamma = spu_add((vector unsigned
int)phase_change,
-
(vector unsigned int)oneup);
- shuffle_mask_gamma = spu_rlqwbyte(shuffle_mask_gamma, (unsigned
int)src%16);
-
-
-
-
- vector unsigned char tgt_second = spu_rlqwbyte(second_oneup,
-((unsigned int)target%16));
- vector unsigned char tgt_first = spu_rlqwbyte(oneup, -((unsigned
int)target%16));
-
- //alpha: first half of first, second half of second, break at (unsigned
int)target%16
- src_cmp = spu_splats((unsigned char)((unsigned int)target%16));
- gt_res = spu_cmpgt(oneup, src_cmp);
- eq_res = spu_cmpeq(oneup, src_cmp);
- cmp_res = spu_or(gt_res, eq_res);
- phase_change = spu_and(sixteen_uchar, cmp_res);
- vector unsigned int shuffle_mask_alpha = spu_add((vector unsigned
int)phase_change,
-
(vector unsigned int)oneup);
-
- //delta: first half of first, first half of second, break at (unsigned
int)target%16
- vector unsigned char shuffle_mask_delta = spu_shuffle(oneup,
tgt_second, (vector unsigned char)shuffle_mask_alpha);
- //epsilon: second half of second, second half of first, break at
(unsigned int)target%16
- vector unsigned char shuffle_mask_epsilon = spu_shuffle(tgt_second,
oneup, (vector unsigned char)shuffle_mask_alpha);
- //zeta: second half of second, first half of first, break at 16 -
(unsigned int)target%16
- vector unsigned int shuffle_mask_zeta =
spu_rlqwbyte(shuffle_mask_alpha, (unsigned int)target%16);
-
- //beta: first half of first, second half of second, break at
num_bytes%16
- src_cmp = spu_splats((unsigned char)(num_bytes%16));
- gt_res = spu_cmpgt(oneup, src_cmp);
- eq_res = spu_cmpeq(oneup, src_cmp);
- cmp_res = spu_or(gt_res, eq_res);
- phase_change = spu_and(sixteen_uchar, cmp_res);
- vector unsigned int shuffle_mask_beta = spu_add((vector unsigned
int)phase_change,
-
(vector unsigned int)oneup);
-
-
-
-
-
-
- qword src_past;
- qword src_present;
- qword tgt_past;
- qword tgt_present;
-
- qword in_temp;
- qword out_temp0;
- qword out_temp1;
-
- src_past = si_lqd((qword)address_counter_src, 0);
- tgt_past = si_lqd((qword)address_counter_tgt, 0);
-
- vector signed short vec_val = spu_splats(val);
- vector unsigned short compare;
- vector unsigned short ones = {1, 1, 1, 1, 1, 1, 1, 1};
- vector unsigned short after_and;
-
- for(i = 0; i < num_bytes/16; ++i) {
-
- src_present = si_lqd((qword)address_counter_src, 16);
- tgt_present = si_lqd((qword)address_counter_tgt, 16);
-
- in_temp = spu_shuffle(src_present, src_past, (vector unsigned
char)shuffle_mask_gamma);
-
- compare = spu_cmpgt((vector signed short) in_temp, vec_val);
- after_and = spu_and(compare, ones);
-
-
- out_temp0 = spu_shuffle(tgt_past, (qword)after_and,
shuffle_mask_delta);
- out_temp1 = spu_shuffle(tgt_present, (qword)after_and,
shuffle_mask_epsilon);
-
- si_stqd(out_temp0, (qword)address_counter_tgt, 0);
- si_stqd(out_temp1, (qword)address_counter_tgt, 16);
-
- tgt_past = out_temp1;
- src_past = src_present;
- address_counter_src = spu_add(address_counter_src, 16);
- address_counter_tgt = spu_add(address_counter_tgt, 16);
-
-
- }
-
- src_present = si_lqd((qword)address_counter_src, 16);
- tgt_present = si_lqd((qword)address_counter_tgt, 16);
-
-
- in_temp = spu_shuffle(src_present, src_past,(vector unsigned char)
shuffle_mask_gamma);
-
- compare = spu_cmpgt((vector signed short) in_temp, vec_val);
- after_and = spu_and(compare, ones);
-
-
- qword target_temp = spu_shuffle(tgt_present, tgt_past, (vector unsigned
char) shuffle_mask_zeta);
- qword meld = spu_shuffle((qword)after_and, target_temp, (vector
unsigned char)shuffle_mask_beta);
-
-
-
- out_temp0 = spu_shuffle(tgt_past, meld, shuffle_mask_delta);
- out_temp1 = spu_shuffle(tgt_present, meld, shuffle_mask_epsilon);
-
- si_stqd(out_temp0, (qword)address_counter_tgt, 0);
- si_stqd(out_temp1, (qword)address_counter_tgt, 16);
-
- return retval;
-}
-
-
-
-/*
-int main(){
-
- signed short pooh[48];
- signed short bear[48];
-
- int i = 0;
- for(i = 0; i < 48; i += 2){
- bear[i] = i;
- bear[i + 1] = -i;
- }
-
- vector_gt_16bit(&pooh[0],&bear[0], 0, 48 * sizeof(signed short));
-
- for(i = 0; i < 48; ++i) {
- printf("%d, ", pooh[i]);
- }
- printf("\n");
-}
-*/
-
diff --git a/volk/spu_lib/spu_16s_vector_subtract_unaligned.c
b/volk/spu_lib/spu_16s_vector_subtract_unaligned.c
deleted file mode 100644
index ea110c8..0000000
--- a/volk/spu_lib/spu_16s_vector_subtract_unaligned.c
+++ /dev/null
@@ -1,178 +0,0 @@
-#include<spu_intrinsics.h>
-
-void* libvector_16s_vector_subtract_unaligned(void* target, void* src0, void*
src1, unsigned int num_bytes){
- //loop iterator i
- int i = 0;
- void* retval = target;
-
-
- //put the target and source addresses into qwords
- vector unsigned int address_counter_tgt = {(unsigned int)target, 0, 0,
0};
- vector unsigned int address_counter_src0 = {(unsigned int)src0, 0, 0
,0};
- vector unsigned int address_counter_src1 = {(unsigned int)src1, 0, 0,
0};
-
- //create shuffle masks
-
- //shuffle mask building blocks:
- //all from the first vector
- vector unsigned char oneup = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
0x07,
- 0x08, 0x09,
0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f};
- //all from the second vector
- vector unsigned char second_oneup = {0x10, 0x11, 0x12, 0x13, 0x14,
0x15, 0x16, 0x17,
-
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f};
-
-
-
- //gamma: second half of the second, first half of the first, break at
(unsigned int)src0%16
- vector unsigned char src_cmp = spu_splats((unsigned char)((unsigned
int)src0%16));
- vector unsigned char gt_res = spu_cmpgt(oneup, src_cmp);
- vector unsigned char eq_res = spu_cmpeq(oneup, src_cmp);
- vector unsigned char cmp_res = spu_or(gt_res, eq_res);
- vector unsigned char sixteen_uchar = spu_splats((unsigned char)16);
- vector unsigned char phase_change = spu_and(sixteen_uchar, cmp_res);
- vector unsigned int shuffle_mask_gamma = spu_add((vector unsigned
int)phase_change,
-
(vector unsigned int)oneup);
- shuffle_mask_gamma = spu_rlqwbyte(shuffle_mask_gamma, (unsigned
int)src0%16);
-
- //eta: second half of the second, first half of the first, break at
(unsigned int)src1%16
- src_cmp = spu_splats((unsigned char)((unsigned int)src1%16));
- gt_res = spu_cmpgt(oneup, src_cmp);
- eq_res = spu_cmpeq(oneup, src_cmp);
- cmp_res = spu_or(gt_res, eq_res);
- sixteen_uchar = spu_splats((unsigned char)16);
- phase_change = spu_and(sixteen_uchar, cmp_res);
- vector unsigned int shuffle_mask_eta = spu_add((vector unsigned
int)phase_change,
-
(vector unsigned int)oneup);
- shuffle_mask_eta = spu_rlqwbyte(shuffle_mask_eta, (unsigned
int)src1%16);
-
-
-
-
-
- vector unsigned char tgt_second = spu_rlqwbyte(second_oneup,
-((unsigned int)target%16));
- vector unsigned char tgt_first = spu_rlqwbyte(oneup, -((unsigned
int)target%16));
-
- //alpha: first half of first, second half of second, break at (unsigned
int)target%16
- src_cmp = spu_splats((unsigned char)((unsigned int)target%16));
- gt_res = spu_cmpgt(oneup, src_cmp);
- eq_res = spu_cmpeq(oneup, src_cmp);
- cmp_res = spu_or(gt_res, eq_res);
- phase_change = spu_and(sixteen_uchar, cmp_res);
- vector unsigned int shuffle_mask_alpha = spu_add((vector unsigned
int)phase_change,
-
(vector unsigned int)oneup);
-
- //delta: first half of first, first half of second, break at (unsigned
int)target%16
- vector unsigned char shuffle_mask_delta = spu_shuffle(oneup,
tgt_second, (vector unsigned char)shuffle_mask_alpha);
- //epsilon: second half of second, second half of first, break at
(unsigned int)target%16
- vector unsigned char shuffle_mask_epsilon = spu_shuffle(tgt_second,
oneup, (vector unsigned char)shuffle_mask_alpha);
- //zeta: second half of second, first half of first, break at 16 -
(unsigned int)target%16
- vector unsigned int shuffle_mask_zeta =
spu_rlqwbyte(shuffle_mask_alpha, (unsigned int)target%16);
-
- //beta: first half of first, second half of second, break at
num_bytes%16
- src_cmp = spu_splats((unsigned char)(num_bytes%16));
- gt_res = spu_cmpgt(oneup, src_cmp);
- eq_res = spu_cmpeq(oneup, src_cmp);
- cmp_res = spu_or(gt_res, eq_res);
- phase_change = spu_and(sixteen_uchar, cmp_res);
- vector unsigned int shuffle_mask_beta = spu_add((vector unsigned
int)phase_change,
-
(vector unsigned int)oneup);
-
-
-
-
-
-
- qword src0_past;
- qword src0_present;
- qword src1_past;
- qword src1_present;
- qword tgt_past;
- qword tgt_present;
-
- qword in_temp0;
- qword in_temp1;
- qword out_temp0;
- qword out_temp1;
-
- vector signed short sum;
-
- src0_past = si_lqd((qword)address_counter_src0, 0);
- src1_past = si_lqd((qword)address_counter_src1, 0);
- tgt_past = si_lqd((qword)address_counter_tgt, 0);
-
- for(i = 0; i < num_bytes/16; ++i) {
-
- src0_present = si_lqd((qword)address_counter_src0, 16);
- src1_present = si_lqd((qword)address_counter_src1, 16);
- tgt_present = si_lqd((qword)address_counter_tgt, 16);
-
- in_temp0 = spu_shuffle(src0_present, src0_past, (vector
unsigned char)shuffle_mask_gamma);
- in_temp1 = spu_shuffle(src1_present, src1_past, (vector
unsigned char)shuffle_mask_eta);
-
- sum = spu_sub((vector signed short)in_temp0, (vector signed
short)in_temp1);
-
-
- out_temp0 = spu_shuffle(tgt_past, (qword)sum,
shuffle_mask_delta);
- out_temp1 = spu_shuffle(tgt_present, (qword)sum,
shuffle_mask_epsilon);
-
- si_stqd(out_temp0, (qword)address_counter_tgt, 0);
- si_stqd(out_temp1, (qword)address_counter_tgt, 16);
-
- tgt_past = out_temp1;
- src0_past = src0_present;
- src1_past = src1_present;
- address_counter_src0 = spu_add(address_counter_src0, 16);
- address_counter_src1 = spu_add(address_counter_src1, 16);
- address_counter_tgt = spu_add(address_counter_tgt, 16);
-
-
- }
-
- src0_present = si_lqd((qword)address_counter_src0, 16);
- src1_present = si_lqd((qword)address_counter_src1, 16);
- tgt_present = si_lqd((qword)address_counter_tgt, 16);
-
-
- in_temp0 = spu_shuffle(src0_present, src0_past, (vector unsigned char)
shuffle_mask_gamma);
- in_temp1 = spu_shuffle(src1_present, src1_past, (vector unsigned char)
shuffle_mask_eta);
- sum = spu_sub((vector signed short)in_temp0, (vector signed
short)in_temp1);
- qword target_temp = spu_shuffle(tgt_present, tgt_past, (vector unsigned
char) shuffle_mask_zeta);
- qword meld = spu_shuffle((qword)sum, target_temp, (vector unsigned
char)shuffle_mask_beta);
-
-
-
- out_temp0 = spu_shuffle(tgt_past, meld, shuffle_mask_delta);
- out_temp1 = spu_shuffle(tgt_present, meld, shuffle_mask_epsilon);
-
- si_stqd(out_temp0, (qword)address_counter_tgt, 0);
- si_stqd(out_temp1, (qword)address_counter_tgt, 16);
-
- return retval;
-}
-
-
-
-/*
-int main(){
-
- signed short pooh[48];
- signed short bear[48];
- signed short res[48];
-
- int i = 0;
- for(i = 0; i < 48; ++i){
- pooh[i] = i;
- }
- for(i = 48; i < 96; ++i){
- bear[i - 48] = i;
- }
-
- vector_subtract_16bit(res, &pooh[0], &bear[0], 48 * sizeof(signed
short));
-
- for(i = 0; i < 48; ++i) {
- printf("%d, ", res[i]);
- }
- printf("\n");
-}
-*/
-
diff --git a/volk/spu_lib/spu_16s_vector_sum_unaligned.c
b/volk/spu_lib/spu_16s_vector_sum_unaligned.c
deleted file mode 100644
index 0097b4f..0000000
--- a/volk/spu_lib/spu_16s_vector_sum_unaligned.c
+++ /dev/null
@@ -1,178 +0,0 @@
-#include<spu_intrinsics.h>
-
-void* libvector_16s_vector_sum_unaligned(void* target, void* src0, void*
src1, unsigned int num_bytes){
- //loop iterator i
- int i = 0;
- void* retval = target;
-
-
- //put the target and source addresses into qwords
- vector unsigned int address_counter_tgt = {(unsigned int)target, 0, 0,
0};
- vector unsigned int address_counter_src0 = {(unsigned int)src0, 0, 0
,0};
- vector unsigned int address_counter_src1 = {(unsigned int)src1, 0, 0,
0};
-
- //create shuffle masks
-
- //shuffle mask building blocks:
- //all from the first vector
- vector unsigned char oneup = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
0x07,
- 0x08, 0x09,
0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f};
- //all from the second vector
- vector unsigned char second_oneup = {0x10, 0x11, 0x12, 0x13, 0x14,
0x15, 0x16, 0x17,
-
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f};
-
-
-
- //gamma: second half of the second, first half of the first, break at
(unsigned int)src0%16
- vector unsigned char src_cmp = spu_splats((unsigned char)((unsigned
int)src0%16));
- vector unsigned char gt_res = spu_cmpgt(oneup, src_cmp);
- vector unsigned char eq_res = spu_cmpeq(oneup, src_cmp);
- vector unsigned char cmp_res = spu_or(gt_res, eq_res);
- vector unsigned char sixteen_uchar = spu_splats((unsigned char)16);
- vector unsigned char phase_change = spu_and(sixteen_uchar, cmp_res);
- vector unsigned int shuffle_mask_gamma = spu_add((vector unsigned
int)phase_change,
-
(vector unsigned int)oneup);
- shuffle_mask_gamma = spu_rlqwbyte(shuffle_mask_gamma, (unsigned
int)src0%16);
-
- //eta: second half of the second, first half of the first, break at
(unsigned int)src1%16
- src_cmp = spu_splats((unsigned char)((unsigned int)src1%16));
- gt_res = spu_cmpgt(oneup, src_cmp);
- eq_res = spu_cmpeq(oneup, src_cmp);
- cmp_res = spu_or(gt_res, eq_res);
- sixteen_uchar = spu_splats((unsigned char)16);
- phase_change = spu_and(sixteen_uchar, cmp_res);
- vector unsigned int shuffle_mask_eta = spu_add((vector unsigned
int)phase_change,
-
(vector unsigned int)oneup);
- shuffle_mask_eta = spu_rlqwbyte(shuffle_mask_eta, (unsigned
int)src1%16);
-
-
-
-
-
- vector unsigned char tgt_second = spu_rlqwbyte(second_oneup,
-((unsigned int)target%16));
- vector unsigned char tgt_first = spu_rlqwbyte(oneup, -((unsigned
int)target%16));
-
- //alpha: first half of first, second half of second, break at (unsigned
int)target%16
- src_cmp = spu_splats((unsigned char)((unsigned int)target%16));
- gt_res = spu_cmpgt(oneup, src_cmp);
- eq_res = spu_cmpeq(oneup, src_cmp);
- cmp_res = spu_or(gt_res, eq_res);
- phase_change = spu_and(sixteen_uchar, cmp_res);
- vector unsigned int shuffle_mask_alpha = spu_add((vector unsigned
int)phase_change,
-
(vector unsigned int)oneup);
-
- //delta: first half of first, first half of second, break at (unsigned
int)target%16
- vector unsigned char shuffle_mask_delta = spu_shuffle(oneup,
tgt_second, (vector unsigned char)shuffle_mask_alpha);
- //epsilon: second half of second, second half of first, break at
(unsigned int)target%16
- vector unsigned char shuffle_mask_epsilon = spu_shuffle(tgt_second,
oneup, (vector unsigned char)shuffle_mask_alpha);
- //zeta: second half of second, first half of first, break at 16 -
(unsigned int)target%16
- vector unsigned int shuffle_mask_zeta =
spu_rlqwbyte(shuffle_mask_alpha, (unsigned int)target%16);
-
- //beta: first half of first, second half of second, break at
num_bytes%16
- src_cmp = spu_splats((unsigned char)(num_bytes%16));
- gt_res = spu_cmpgt(oneup, src_cmp);
- eq_res = spu_cmpeq(oneup, src_cmp);
- cmp_res = spu_or(gt_res, eq_res);
- phase_change = spu_and(sixteen_uchar, cmp_res);
- vector unsigned int shuffle_mask_beta = spu_add((vector unsigned
int)phase_change,
-
(vector unsigned int)oneup);
-
-
-
-
-
-
- qword src0_past;
- qword src0_present;
- qword src1_past;
- qword src1_present;
- qword tgt_past;
- qword tgt_present;
-
- qword in_temp0;
- qword in_temp1;
- qword out_temp0;
- qword out_temp1;
-
- vector signed int sum;
-
- src0_past = si_lqd((qword)address_counter_src0, 0);
- src1_past = si_lqd((qword)address_counter_src1, 0);
- tgt_past = si_lqd((qword)address_counter_tgt, 0);
-
- for(i = 0; i < num_bytes/16; ++i) {
-
- src0_present = si_lqd((qword)address_counter_src0, 16);
- src1_present = si_lqd((qword)address_counter_src1, 16);
- tgt_present = si_lqd((qword)address_counter_tgt, 16);
-
- in_temp0 = spu_shuffle(src0_present, src0_past, (vector
unsigned char)shuffle_mask_gamma);
- in_temp1 = spu_shuffle(src1_present, src1_past, (vector
unsigned char)shuffle_mask_eta);
-
- sum = spu_add((vector signed int)in_temp0, (vector signed
int)in_temp1);
-
-
- out_temp0 = spu_shuffle(tgt_past, (qword)sum,
shuffle_mask_delta);
- out_temp1 = spu_shuffle(tgt_present, (qword)sum,
shuffle_mask_epsilon);
-
- si_stqd(out_temp0, (qword)address_counter_tgt, 0);
- si_stqd(out_temp1, (qword)address_counter_tgt, 16);
-
- tgt_past = out_temp1;
- src0_past = src0_present;
- src1_past = src1_present;
- address_counter_src0 = spu_add(address_counter_src0, 16);
- address_counter_src1 = spu_add(address_counter_src1, 16);
- address_counter_tgt = spu_add(address_counter_tgt, 16);
-
-
- }
-
- src0_present = si_lqd((qword)address_counter_src0, 16);
- src1_present = si_lqd((qword)address_counter_src1, 16);
- tgt_present = si_lqd((qword)address_counter_tgt, 16);
-
-
- in_temp0 = spu_shuffle(src0_present, src0_past, (vector unsigned char)
shuffle_mask_gamma);
- in_temp1 = spu_shuffle(src1_present, src1_past, (vector unsigned char)
shuffle_mask_eta);
- sum = spu_add((vector signed int)in_temp0, (vector signed int)in_temp1);
- qword target_temp = spu_shuffle(tgt_present, tgt_past, (vector unsigned
char) shuffle_mask_zeta);
- qword meld = spu_shuffle((qword)sum, target_temp, (vector unsigned
char)shuffle_mask_beta);
-
-
-
- out_temp0 = spu_shuffle(tgt_past, meld, shuffle_mask_delta);
- out_temp1 = spu_shuffle(tgt_present, meld, shuffle_mask_epsilon);
-
- si_stqd(out_temp0, (qword)address_counter_tgt, 0);
- si_stqd(out_temp1, (qword)address_counter_tgt, 16);
-
- return retval;
-}
-
-
-
-/*
-int main(){
-
- signed short pooh[48];
- signed short bear[48];
- signed short res[48];
-
- int i = 0;
- for(i = 0; i < 48; ++i){
- pooh[i] = i;
- }
- for(i = 48; i < 96; ++i){
- bear[i - 48] = i;
- }
-
- vector_sum(&pooh[9], &pooh[9], &bear[3], 30);
-
- for(i = 0; i < 48; ++i) {
- printf("%d, ", pooh[i]);
- }
- printf("\n");
-}
-*/
-
diff --git a/volk/spu_lib/spu_32fc_pointwise_multiply_unaligned.c
b/volk/spu_lib/spu_32fc_pointwise_multiply_unaligned.c
deleted file mode 100644
index d1c9604..0000000
--- a/volk/spu_lib/spu_32fc_pointwise_multiply_unaligned.c
+++ /dev/null
@@ -1,222 +0,0 @@
-#include<spu_intrinsics.h>
-
-
-
-
-void* libvector_pointwise_multiply_32fc_unaligned(void* target, void* src0,
void* src1, unsigned int num_bytes){
- //loop iterator i
- int i = 0;
- void* retval = target;
-
-
- //put the target and source addresses into qwords
- vector unsigned int address_counter_tgt = {(unsigned int)target, 0, 0,
0};
- vector unsigned int address_counter_src0 = {(unsigned int)src0, 0, 0
,0};
- vector unsigned int address_counter_src1 = {(unsigned int)src1, 0, 0,
0};
-
- //create shuffle masks
-
- //shuffle mask building blocks:
- //all from the first vector
- vector unsigned char oneup = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
0x07,
- 0x08, 0x09,
0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f};
- //all from the second vector
- vector unsigned char second_oneup = {0x10, 0x11, 0x12, 0x13, 0x14,
0x15, 0x16, 0x17,
-
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f};
-
-
-
- //gamma: second half of the second, first half of the first, break at
(unsigned int)src0%16
- vector unsigned char src_cmp = spu_splats((unsigned char)((unsigned
int)src0%16));
- vector unsigned char gt_res = spu_cmpgt(oneup, src_cmp);
- vector unsigned char eq_res = spu_cmpeq(oneup, src_cmp);
- vector unsigned char cmp_res = spu_or(gt_res, eq_res);
- vector unsigned char sixteen_uchar = spu_splats((unsigned char)16);
- vector unsigned char phase_change = spu_and(sixteen_uchar, cmp_res);
- vector unsigned int shuffle_mask_gamma = spu_add((vector unsigned
int)phase_change,
-
(vector unsigned int)oneup);
- shuffle_mask_gamma = spu_rlqwbyte(shuffle_mask_gamma, (unsigned
int)src0%16);
-
- //eta: second half of the second, first half of the first, break at
(unsigned int)src1%16
- src_cmp = spu_splats((unsigned char)((unsigned int)src1%16));
- gt_res = spu_cmpgt(oneup, src_cmp);
- eq_res = spu_cmpeq(oneup, src_cmp);
- cmp_res = spu_or(gt_res, eq_res);
- sixteen_uchar = spu_splats((unsigned char)16);
- phase_change = spu_and(sixteen_uchar, cmp_res);
- vector unsigned int shuffle_mask_eta = spu_add((vector unsigned
int)phase_change,
-
(vector unsigned int)oneup);
- shuffle_mask_eta = spu_rlqwbyte(shuffle_mask_eta, (unsigned
int)src1%16);
-
-
-
-
-
- vector unsigned char tgt_second = spu_rlqwbyte(second_oneup,
-((unsigned int)target%16));
- vector unsigned char tgt_first = spu_rlqwbyte(oneup, -((unsigned
int)target%16));
-
- //alpha: first half of first, second half of second, break at (unsigned
int)target%16
- src_cmp = spu_splats((unsigned char)((unsigned int)target%16));
- gt_res = spu_cmpgt(oneup, src_cmp);
- eq_res = spu_cmpeq(oneup, src_cmp);
- cmp_res = spu_or(gt_res, eq_res);
- phase_change = spu_and(sixteen_uchar, cmp_res);
- vector unsigned int shuffle_mask_alpha = spu_add((vector unsigned
int)phase_change,
-
(vector unsigned int)oneup);
-
- //delta: first half of first, first half of second, break at (unsigned
int)target%16
- vector unsigned char shuffle_mask_delta = spu_shuffle(oneup,
tgt_second, (vector unsigned char)shuffle_mask_alpha);
- //epsilon: second half of second, second half of first, break at
(unsigned int)target%16
- vector unsigned char shuffle_mask_epsilon = spu_shuffle(tgt_second,
oneup, (vector unsigned char)shuffle_mask_alpha);
- //zeta: second half of second, first half of first, break at 16 -
(unsigned int)target%16
- vector unsigned int shuffle_mask_zeta =
spu_rlqwbyte(shuffle_mask_alpha, (unsigned int)target%16);
-
- //beta: first half of first, second half of second, break at
num_bytes%16
- src_cmp = spu_splats((unsigned char)(num_bytes%16));
- gt_res = spu_cmpgt(oneup, src_cmp);
- eq_res = spu_cmpeq(oneup, src_cmp);
- cmp_res = spu_or(gt_res, eq_res);
- phase_change = spu_and(sixteen_uchar, cmp_res);
- vector unsigned int shuffle_mask_beta = spu_add((vector unsigned
int)phase_change,
-
(vector unsigned int)oneup);
-
-
-
-
-
-
- qword src0_past;
- qword src0_present;
- qword src1_past;
- qword src1_present;
- qword tgt_past;
- qword tgt_present;
-
- qword in_temp0;
- qword in_temp1;
- qword out_temp0;
- qword out_temp1;
-
-
- src0_past = si_lqd((qword)address_counter_src0, 0);
- src1_past = si_lqd((qword)address_counter_src1, 0);
- tgt_past = si_lqd((qword)address_counter_tgt, 0);
-
- vector unsigned char shuffle_mask_complexprod0 = {0x04, 0x05, 0x06,
0x07, 0x00, 0x01, 0x02, 0x03,
-
0x0c, 0x0d, 0x0e, 0x0f, 0x08, 0x09, 0x0a, 0x0b};
- vector unsigned char shuffle_mask_complexprod1 = {0x00, 0x01, 0x02,
0x03, 0x10, 0x11, 0x12, 0x13,
-
0x08, 0x09, 0x0a, 0x0b, 0x18, 0x19, 0x1a, 0x1b};
- vector unsigned char shuffle_mask_complexprod2 = {0x04, 0x05, 0x06,
0x07, 0x14, 0x15, 0x16, 0x17,
-
0x0c, 0x0d, 0x0e, 0x0f, 0x1c, 0x1d, 0x1e, 0x1f};
- vector unsigned char sign_changer = {0x00, 0x00, 0x00, 0x00, 0x80,
0x00, 0x00, 0x00,
-
0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00};
-
- vector float prod0;
- qword shuf0;
- vector float prod1;
- vector float sign_change;
- qword summand0;
- qword summand1;
- vector float sum;
-
-
- for(i = 0; i < num_bytes/16; ++i) {
-
- src0_present = si_lqd((qword)address_counter_src0, 16);
- src1_present = si_lqd((qword)address_counter_src1, 16);
- tgt_present = si_lqd((qword)address_counter_tgt, 16);
-
- in_temp0 = spu_shuffle(src0_present, src0_past, (vector
unsigned char)shuffle_mask_gamma);
- in_temp1 = spu_shuffle(src1_present, src1_past, (vector
unsigned char)shuffle_mask_eta);
-
- prod0 = spu_mul((vector float)in_temp0, (vector float)in_temp1);
- shuf0 = spu_shuffle((qword)in_temp1, (qword)in_temp1,
shuffle_mask_complexprod0);
- prod1 = spu_mul((vector float)in_temp0, (vector float)shuf0);
- sign_change = spu_xor(prod0, (vector float)sign_changer);
-
- summand0 = spu_shuffle((qword)sign_change, (qword)prod1,
shuffle_mask_complexprod1);
-
- summand1 = spu_shuffle((qword)sign_change, (qword)prod1,
shuffle_mask_complexprod2);
-
- sum = spu_add((vector float)summand0, (vector float)summand1);
-
-
- out_temp0 = spu_shuffle(tgt_past, (qword)sum,
shuffle_mask_delta);
- out_temp1 = spu_shuffle(tgt_present, (qword)sum,
shuffle_mask_epsilon);
-
- si_stqd(out_temp0, (qword)address_counter_tgt, 0);
- si_stqd(out_temp1, (qword)address_counter_tgt, 16);
-
- tgt_past = out_temp1;
- src0_past = src0_present;
- src1_past = src1_present;
- address_counter_src0 = spu_add(address_counter_src0, 16);
- address_counter_src1 = spu_add(address_counter_src1, 16);
- address_counter_tgt = spu_add(address_counter_tgt, 16);
-
-
- }
-
- src0_present = si_lqd((qword)address_counter_src0, 16);
- src1_present = si_lqd((qword)address_counter_src1, 16);
- tgt_present = si_lqd((qword)address_counter_tgt, 16);
-
-
- in_temp0 = spu_shuffle(src0_present, src0_past, (vector unsigned char)
shuffle_mask_gamma);
- in_temp1 = spu_shuffle(src1_present, src1_past, (vector unsigned char)
shuffle_mask_eta);
-
-
- prod0 = spu_mul((vector float)in_temp0, (vector float)in_temp1);
- shuf0 = spu_shuffle((qword)in_temp1, (qword)in_temp1,
shuffle_mask_complexprod0);
- prod1 = spu_mul(prod0, (vector float)shuf0);
- sign_change = spu_xor(prod0, (vector float)sign_changer);
- summand0 = spu_shuffle((qword)sign_change, (qword)prod1,
shuffle_mask_complexprod1);
- summand1 = spu_shuffle((qword)sign_change, (qword)prod1,
shuffle_mask_complexprod2);
- sum = spu_add((vector float)summand0, (vector float)summand1);
-
-
-
- qword target_temp = spu_shuffle(tgt_present, tgt_past, (vector unsigned
char) shuffle_mask_zeta);
- qword meld = spu_shuffle((qword)sum, target_temp, (vector unsigned
char)shuffle_mask_beta);
-
-
-
- out_temp0 = spu_shuffle(tgt_past, meld, shuffle_mask_delta);
- out_temp1 = spu_shuffle(tgt_present, meld, shuffle_mask_epsilon);
-
- si_stqd(out_temp0, (qword)address_counter_tgt, 0);
- si_stqd(out_temp1, (qword)address_counter_tgt, 16);
-
- return retval;
-}
-
-
-
-/*
-int main(){
-
- float pooh[48];
- float bear[48];
- float res[48];
-
- int i = 0;
- for(i = 0; i < 48; ++i){
- pooh[i] = (float) i;
- }
- for(i = 48; i < 96; ++i){
- bear[i - 48] = (float) i;
- }
-
- vector_product_complex(res, pooh, bear, 48*sizeof(float));
-
-
-
- for(i = 0; i < 48; ++i) {
- printf("%f, ", res[i]);
- }
- printf("\n");
-
-
-}
-*/
-
diff --git a/volk/spu_lib/spu_memcpy_unaligned.c
b/volk/spu_lib/spu_memcpy_unaligned.c
deleted file mode 100644
index 0f15b5d..0000000
--- a/volk/spu_lib/spu_memcpy_unaligned.c
+++ /dev/null
@@ -1,290 +0,0 @@
-#include<libvector/libvector_memcpy_unaligned.h
-#include<spu_intrinsics.h>
-
-void* libvector_memcpy_unaligned(void* target, void* src, unsigned int
num_bytes){
- //loop iterator i
- int i = 0;
- void* retval = target;
-
-
- //put the target and source addresses into qwords
- vector unsigned int address_counter_tgt = {(unsigned int)target, 0, 0,
0};
- vector unsigned int address_counter_src = {(unsigned int)src, 0, 0 ,0};
-
- //create shuffle masks
-
- //shuffle mask building blocks:
- //all from the first vector
- vector unsigned char oneup = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
0x07,
- 0x08, 0x09,
0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f};
- //all from the second vector
- vector unsigned char second_oneup = {0x10, 0x11, 0x12, 0x13, 0x14,
0x15, 0x16, 0x17,
-
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f};
-
-
-
- //gamma: second half of the second, first half of the first, break at
(unsigned int)src%16
- vector unsigned char src_cmp = spu_splats((unsigned char)((unsigned
int)src%16));
- vector unsigned char gt_res = spu_cmpgt(oneup, src_cmp);
- vector unsigned char eq_res = spu_cmpeq(oneup, src_cmp);
- vector unsigned char cmp_res = spu_or(gt_res, eq_res);
- vector unsigned char sixteen_uchar = spu_splats((unsigned char)16);
- vector unsigned char phase_change = spu_and(sixteen_uchar, cmp_res);
- vector unsigned int shuffle_mask_gamma = spu_add((vector unsigned
int)phase_change,
-
(vector unsigned int)oneup);
- shuffle_mask_gamma = spu_rlqwbyte(shuffle_mask_gamma, (unsigned
int)src%16);
-
-
-
-
- vector unsigned char tgt_second = spu_rlqwbyte(second_oneup,
-((unsigned int)target%16));
- vector unsigned char tgt_first = spu_rlqwbyte(oneup, -((unsigned
int)target%16));
-
- //alpha: first half of first, second half of second, break at (unsigned
int)target%16
- src_cmp = spu_splats((unsigned char)((unsigned int)target%16));
- gt_res = spu_cmpgt(oneup, src_cmp);
- eq_res = spu_cmpeq(oneup, src_cmp);
- cmp_res = spu_or(gt_res, eq_res);
- phase_change = spu_and(sixteen_uchar, cmp_res);
- vector unsigned int shuffle_mask_alpha = spu_add((vector unsigned
int)phase_change,
-
(vector unsigned int)oneup);
-
- //delta: first half of first, first half of second, break at (unsigned
int)target%16
- vector unsigned char shuffle_mask_delta = spu_shuffle(oneup,
tgt_second, (vector unsigned char)shuffle_mask_alpha);
- //epsilon: second half of second, second half of first, break at
(unsigned int)target%16
- vector unsigned char shuffle_mask_epsilon = spu_shuffle(tgt_second,
oneup, (vector unsigned char)shuffle_mask_alpha);
- //zeta: second half of second, first half of first, break at 16 -
(unsigned int)target%16
- vector unsigned int shuffle_mask_zeta =
spu_rlqwbyte(shuffle_mask_alpha, (unsigned int)target%16);
-
- //beta: first half of first, second half of second, break at
num_bytes%16
- src_cmp = spu_splats((unsigned char)(num_bytes%16));
- gt_res = spu_cmpgt(oneup, src_cmp);
- eq_res = spu_cmpeq(oneup, src_cmp);
- cmp_res = spu_or(gt_res, eq_res);
- phase_change = spu_and(sixteen_uchar, cmp_res);
- vector unsigned int shuffle_mask_beta = spu_add((vector unsigned
int)phase_change,
-
(vector unsigned int)oneup);
-
-
-
-
-
-
- qword src_past;
- qword src_present;
- qword tgt_past;
- qword tgt_present;
-
- qword in_temp;
- qword out_temp0;
- qword out_temp1;
-
- src_past = si_lqd((qword)address_counter_src, 0);
- tgt_past = si_lqd((qword)address_counter_tgt, 0);
-
- for(i = 0; i < num_bytes/16; ++i) {
-
- src_present = si_lqd((qword)address_counter_src, 16);
- tgt_present = si_lqd((qword)address_counter_tgt, 16);
-
- in_temp = spu_shuffle(src_present, src_past, (vector unsigned
char)shuffle_mask_gamma);
-
- out_temp0 = spu_shuffle(tgt_past, in_temp, shuffle_mask_delta);
- out_temp1 = spu_shuffle(tgt_present, in_temp,
shuffle_mask_epsilon);
-
- si_stqd(out_temp0, (qword)address_counter_tgt, 0);
- si_stqd(out_temp1, (qword)address_counter_tgt, 16);
-
- tgt_past = out_temp1;
- src_past = src_present;
- address_counter_src = spu_add(address_counter_src, 16);
- address_counter_tgt = spu_add(address_counter_tgt, 16);
-
-
- }
-
- src_present = si_lqd((qword)address_counter_src, 16);
- tgt_present = si_lqd((qword)address_counter_tgt, 16);
-
-
- in_temp = spu_shuffle(src_present, src_past,(vector unsigned char)
shuffle_mask_gamma);
- qword target_temp = spu_shuffle(tgt_present, tgt_past, (vector unsigned
char) shuffle_mask_zeta);
- qword meld = spu_shuffle(in_temp, target_temp, (vector unsigned
char)shuffle_mask_beta);
-
-
-
- out_temp0 = spu_shuffle(tgt_past, meld, shuffle_mask_delta);
- out_temp1 = spu_shuffle(tgt_present, meld, shuffle_mask_epsilon);
-
- si_stqd(out_temp0, (qword)address_counter_tgt, 0);
- si_stqd(out_temp1, (qword)address_counter_tgt, 16);
-
- return retval;
-}
-
-
-
-/*
-void* mcpy(void* target, void* src, size_t num_bytes){
- //loop iterator i
- int i = 0;
- void* retval = src;
-
- //put the target and source addresses into qwords
- vector unsigned int address_counter_tgt = {(unsigned int)target, 0, 0,
0};
- vector unsigned int address_counter_src = {(unsigned int)src, 0, 0 ,0};
-
- //create shuffle masks
-
- //shuffle mask building blocks:
- //all from the first vector
- vector unsigned char oneup = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
0x07,
- 0x08, 0x09,
0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f};
- //all from the second vector
- vector unsigned char second_oneup = {0x10, 0x11, 0x12, 0x13, 0x14,
0x15, 0x16, 0x17,
-
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f};
-
-
-
- //gamma: second half of the second, first half of the first, break at
src%16
- vector unsigned char src_cmp = spu_splats((unsigned char)(src%16));
- vector unsigned char gt_res = spu_cmpgt(oneup, src_cmp);
- vector unsigned char eq_res = spu_cmpeq(oneup, src_cmp);
- vector unsigned char cmp_res = spu_or(gt_res, eq_res);
- vector unsigned char sixteen_uchar = spu_splats((unsigned char)16);
- vector unsigned char phase_change = spu_and(sixteen_uchar, cmp_res);
- vector unsigned int shuffle_mask_gamma = spu_add((vector unsigned
int)phase_change,
-
(vector unsigned int)oneup);
- shuffle_mask_gamma = spu_rlqwbyte(shuffle_mask_gamma, src%16);
-
-
-
-
- vector unsigned char tgt_second = spu_rlqwbyte(second_oneup,
-(target%16));
- vector unsigned char tgt_first = spu_rlqwbyte(oneup, -(target%16));
-
- //alpha: first half of first, second half of second, break at target%16
- src_cmp = spu_splats((unsigned char)(target%16));
- gt_res = spu_cmpgt(oneup, src_cmp);
- eq_res = spu_cmpeq(oneup, src_cmp);
- cmp_res = spu_or(gt_res, eq_res);
- phase_change = spu_and(sixteen_uchar, cmp_res);
- vector unsigned int shuffle_mask_alpha = spu_add((vector unsigned
int)phase_change,
-
(vector unsigned int)oneup);
-
- //delta: first half of first, first half of second, break at target%16
- vector unsigned char shuffle_mask_delta = spu_shuffle(oneup,
tgt_second, (vector unsigned char)shuffle_mask_alpha);
- //epsilon: second half of second, second half of first, break at
target%16
- vector unsigned char shuffle_mask_epsilon = spu_shuffle(tgt_second,
oneup, (vector unsigned char)shuffle_mask_alpha);
- //zeta: second half of second, first half of first, break at 16 -
target%16
- vector unsigned int shuffle_mask_zeta =
spu_rlqwbyte(shuffle_mask_alpha, target%16);
-
- //beta: first half of first, second half of second, break at
num_bytes%16
- src_cmp = spu_splats((unsigned char)(num_bytes%16));
- gt_res = spu_cmpgt(oneup, src_cmp);
- eq_res = spu_cmpeq(oneup, src_cmp);
- cmp_res = spu_or(gt_res, eq_res);
- phase_change = spu_and(sixteen_uchar, cmp_res);
- vector unsigned int shuffle_mask_beta = spu_add((vector unsigned
int)phase_change,
-
(vector unsigned int)oneup);
-
-
- printf("num_bytesmod16 %d\n", num_bytes%16);
- printf("beta %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d,
%d, %d\n",
- spu_extract((vector unsigned char) shuffle_mask_beta, 0),
- spu_extract((vector unsigned char) shuffle_mask_beta, 1),
- spu_extract((vector unsigned char) shuffle_mask_beta, 2),
- spu_extract((vector unsigned char) shuffle_mask_beta, 3),
- spu_extract((vector unsigned char) shuffle_mask_beta, 4),
- spu_extract((vector unsigned char) shuffle_mask_beta, 5),
- spu_extract((vector unsigned char) shuffle_mask_beta, 6),
- spu_extract((vector unsigned char) shuffle_mask_beta, 7),
- spu_extract((vector unsigned char) shuffle_mask_beta, 8),
- spu_extract((vector unsigned char) shuffle_mask_beta, 9),
- spu_extract((vector unsigned char) shuffle_mask_beta, 10),
- spu_extract((vector unsigned char) shuffle_mask_beta, 11),
- spu_extract((vector unsigned char) shuffle_mask_beta, 12),
- spu_extract((vector unsigned char) shuffle_mask_beta, 13),
- spu_extract((vector unsigned char) shuffle_mask_beta, 14),
- spu_extract((vector unsigned char) shuffle_mask_beta, 15));
-
-
-
-
-
-
-
- qword src_past;
- qword src_present;
- qword tgt_past;
- qword tgt_present;
-
- qword in_temp;
- qword out_temp0;
- qword out_temp1;
-
- src_past = si_lqd((qword)address_counter_src, 0);
- tgt_past = si_lqd((qword)address_counter_tgt, 0);
-
- for(i = 0; i < num_bytes/16; ++i) {
-
- src_present = si_lqd((qword)address_counter_src, 16);
- tgt_present = si_lqd((qword)address_counter_tgt, 16);
-
- in_temp = spu_shuffle(src_present, src_past, (vector unsigned
char)shuffle_mask_gamma);
-
- out_temp0 = spu_shuffle(tgt_past, in_temp, shuffle_mask_delta);
- out_temp1 = spu_shuffle(tgt_present, in_temp,
shuffle_mask_epsilon);
-
- si_stqd(out_temp0, (qword)address_counter_tgt, 0);
- si_stqd(out_temp1, (qword)address_counter_tgt, 16);
-
- tgt_past = out_temp1;
- src_past = src_present;
- address_counter_src = spu_add(address_counter_src, 16);
- address_counter_tgt = spu_add(address_counter_tgt, 16);
-
-
- }
-
- src_present = si_lqd((qword)address_counter_src, 16);
- tgt_present = si_lqd((qword)address_counter_tgt, 16);
-
-
- in_temp = spu_shuffle(src_present, src_past,(vector unsigned char)
shuffle_mask_gamma);
- qword target_temp = spu_shuffle(tgt_present, tgt_past, (vector unsigned
char) shuffle_mask_zeta);
- qword meld = spu_shuffle(in_temp, target_temp, (vector unsigned
char)shuffle_mask_beta);
-
-
-
- out_temp0 = spu_shuffle(tgt_past, meld, shuffle_mask_delta);
- out_temp1 = spu_shuffle(tgt_present, meld, shuffle_mask_epsilon);
-
- si_stqd(out_temp0, (qword)address_counter_tgt, 0);
- si_stqd(out_temp1, (qword)address_counter_tgt, 16);
-
- return retval;
-
-}
-*/
-/*
-int main(){
-
- unsigned char pooh[48];
- unsigned char bear[48];
-
- int i = 0;
- for(i = 0; i < 48; ++i){
- pooh[i] = i;
- bear[i] = i;
- }
-
- spu_mcpy(&pooh[9],&bear[3], 15);
-
- for(i = 0; i < 48; ++i) {
- printf("%d, ", pooh[i]);
- }
- printf("\n");
-}
-
-*/
diff --git a/volk/spu_lib/spu_memset_unaligned.S
b/volk/spu_lib/spu_memset_unaligned.S
deleted file mode 100644
index c260a12..0000000
--- a/volk/spu_lib/spu_memset_unaligned.S
+++ /dev/null
@@ -1,185 +0,0 @@
-/* -*- asm -*- */
-/*
- * Copyright 2008 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#include "gc_spu_macs.h"
-
- .file "spu_memset_unaligned.S"
-
- /*
- * Computes this, only a lot faster...
- *
- * void *
- * libvector_memset_unaligned(void *pv, int c, size_t n)
- * {
- * unsigned char *p = (unsigned char *) pv;
- * size_t i;
- * for (i = 0; i < n; i++)
- * p[i] = c;
- *
- * return pv;
- * }
- */
-
-#define p_arg arg1 // we're going to clobber arg1 w/ the return
value
-#define c arg2 // the constant we're writing
-#define n arg3 // how many bytes to write
-
-#define p r13 // where we're writing
-#define t0 r14
-#define t1 r15
-#define mask r16
-#define old r17
-#define an r18 // aligned n (n rounded down to mod 16 boundary)
-#define next_p r19
-#define cond1 r20
-#define cond2 r21
-#define m r22
-#define r r23
-
- PROC_ENTRY(libvector_memset_unaligned)
-
- // Hint the return from do_head, in case we go that way.
- // There's pretty much nothing to can do to hint the branch to it.
- hbrr do_head_br, head_complete
-
- MR(p, p_arg) // leaves p, the return value, in the correct reg (r3)
- BRZ_RETURN(n)
-
- MODULO(t0, p, 16) // is p%16 == 0?
- VSPLTB(c, c, 3) // splat byte in preferred slot of c into all
slots
- brnz t0, do_head // no, handle it
-head_complete:
-
- /*
- * preconditions:
- * p%16 == 0, n > 0
- */
- hbrr middle_loop_br, middle_loop
-
- ROUND_DOWN(an, n, 16) // an is "aligned n"
- MODULO(n, n, 16) // what's left over in the last quad
- brz an, do_tail // no whole quad words; skip to tail
- clgti t0, an, 127 // an >= 128?
- brz t0, middle2 // nope, go handle the cases between 0 and 112
-
- /*
- * 128 bytes / iteration
- */
- .p2align 4
-middle_loop:
- ai an, an, -128
- stqd c, 0*16(p)
- ai next_p, p, 128
- stqd c, 1*16(p)
- cgti cond1, an, 127
- stqd c, 2*16(p)
-
- stqd c, 3*16(p)
- stqd c, 4*16(p)
- stqd c, 5*16(p)
- stqd c, 6*16(p)
-
- MR(p, next_p)
- stqd c, 7*16-128(next_p)
- or cond2, n, an
-middle_loop_br:
- brnz cond1, middle_loop
-
- /*
- * if an and n are both zero, return now
- */
- BRZ_RETURN(cond2)
-
- /*
- * otherwise handle last of full quad words
- *
- * 0 <= an < 128, p%16 == 0
- */
-middle2:
- /*
- * if an == 0, go handle the final non-full quadword
- */
- brz an, do_tail
- hbrr middle2_loop_br, middle2_loop
-
- .p2align 3
-middle2_loop:
- ai next_p, p, 16
- stqd c, 0(p)
- ai an, an, -16
- LMR(p, next_p)
-middle2_loop_br:
- brnz an, middle2_loop
-
- /* We're done with the full quadwords. */
-
- /*
- * Handle the final partial quadword.
- * We'll be modifying only the left hand portion of the quad.
- *
- * preconditions:
- * an == 0, 0 <= n < 16, p%16 == 0
- */
-do_tail:
- HINT_RETURN(do_tail_ret)
- il mask, -1
- sfi t1, n, 16 // t1 = 16 - n
- lqd old, 0(p)
- shlqby mask, mask, t1
- selb t0, old, c, mask
- stqd t0, 0(p)
-do_tail_ret:
- RETURN()
-
- /*
- * ----------------------------------------------------------------
- * Handle the first partial quadword
- *
- * preconditions:
- * p%16 != 0
- *
- * postconditions:
- * p%16 == 0 or n == 0
- *
- * |-- m --|
- * +----------------+----------------+
- * | //////// | |
- * +----------------+----------------+
- * |----- r -----|
- * p
- * ----------------------------------------------------------------
- */
-do_head:
- lqd old, 0(p)
- MODULO_NEG(r, p, 16)
- il mask, -1
- UMIN(m, r, n)
- shlqby mask, mask, m // 1's in the top, m*8 0's in the bottom
- MR(t1, p)
- sf t0, m, r // t0 = r - m
- a p, p, m // p += m
- rotqby mask, mask, t0 // rotate 0's to the right place
- sf n, m, n // n -= m
- selb t0, c, old, mask // merge
- stqd t0, 0(t1)
- BRZ_RETURN(n)
-do_head_br:
- br head_complete
- [Commit-gnuradio] [gnuradio] branch master updated (300f5fb -> 19d111e), git, 2014/01/28
- [Commit-gnuradio] [gnuradio] 01/06: gr-uhd: add option to uhd_rx_cfile to write metadata file., git, 2014/01/28
- [Commit-gnuradio] [gnuradio] 02/06: Merge branch 'maint', git, 2014/01/28
- [Commit-gnuradio] [gnuradio] 06/06: volk: remove unused spu_lib directory from VOLK.,
git <=
- [Commit-gnuradio] [gnuradio] 05/06: cmake: fixed ICE 3.4 version check for GCC < 4.7., git, 2014/01/28
- [Commit-gnuradio] [gnuradio] 03/06: volk: follow-up to commit cb4142d45 for added volk_get_machine function on master., git, 2014/01/28
- [Commit-gnuradio] [gnuradio] 04/06: Merge remote-tracking branch 'stiabhan/uhd_rx_cfile', git, 2014/01/28