source: icXML/icXML-devel/src/icxmlc/XMLBlockCopy.h @ 2720

Last change on this file since 2720 was 2720, checked in by cameron, 6 years ago

Initial check-in of icXML 0.8 source files

File size: 3.9 KB
Line 
1/*
2 *  Copyright © 2012 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icXML is a trademark of International Characters.
5 */
6
7/*
8 * @author Nigel Medforth, nigelm -at- interational-characters.com
9 * @version $Id: XMLBlockCopy.h 207 2012-12-02 20:38:22Z robc $
10 *
11 */
12
13#ifndef XML_BLOCK_COPY
14#define XML_BLOCK_COPY
15
16#include <simd-lib/bitblock.hpp>
17
18/**
19
20  The following functions allow us to force the compiler to properly unroll and organize the SIMD
21  loads and stores in such a way that it is efficient for block at a time copying. They all assume
22  that N bitblocks/bytepacks are being copied at a time and that N is known at compile time. Do not
23  use them if those assumptions are not guaranteed to be true.
24
25 **/
26
27template<unsigned int blocks>
28IDISA_ALWAYS_INLINE
29void
30copy_unaligned(const BitBlock * const input, BitBlock * const output)
31{
32        copy_unaligned<1>(input, output);
33        copy_unaligned<blocks - 1>(input + 1, output + 1);
34}
35
36template<>
37IDISA_ALWAYS_INLINE
38void
39copy_unaligned<1>(const BitBlock * const input, BitBlock * const output)
40{
41        bitblock::store_unaligned(bitblock::load_unaligned(input), output);
42}
43
44template<>
45IDISA_ALWAYS_INLINE
46void
47copy_unaligned<0>(const BitBlock * const, BitBlock * const) { /* do nothing */ }
48
49/* ----------------------------------------------------------------------------------------------- */
50
51template<unsigned int blocks>
52IDISA_ALWAYS_INLINE
53void
54copy_aligned(const BitBlock * const input, BitBlock * const output)
55{
56        copy_aligned<1>(input, output);
57        copy_aligned<blocks - 1>(input + 1, output + 1);
58}
59
60template<>
61IDISA_ALWAYS_INLINE
62void
63copy_aligned<1>(const BitBlock * const input, BitBlock * const output)
64{
65        bitblock::store_aligned(bitblock::load_aligned(input), output);
66}
67
68template<>
69IDISA_ALWAYS_INLINE
70void
71copy_aligned<0>(const BitBlock * const, BitBlock * const) { /* do nothing */ }
72
73/* ----------------------------------------------------------------------------------------------- */
74
75template<unsigned int blocks>
76IDISA_ALWAYS_INLINE
77void
78copy_unaligned_to_aligned(const BitBlock * const input, BitBlock * const output)
79{
80        copy_unaligned_to_aligned<1>(input, output);
81        copy_unaligned_to_aligned<blocks - 1>(input + 1, output + 1);
82}
83
84template<>
85IDISA_ALWAYS_INLINE
86void
87copy_unaligned_to_aligned<1>(const BitBlock * const input, BitBlock * const output)
88{
89        bitblock::store_aligned(bitblock::load_unaligned(input), output);
90}
91
92template<>
93IDISA_ALWAYS_INLINE
94void
95copy_unaligned_to_aligned<0>(const BitBlock * const, BitBlock * const) { /* do nothing */ }
96
97/* ----------------------------------------------------------------------------------------------- */
98
99template<unsigned int blocks>
100IDISA_ALWAYS_INLINE
101void
102clear_aligned_blocks(BitBlock * stream)
103{
104        clear_aligned_blocks<1>(stream);
105        clear_aligned_blocks<blocks - 1>(stream + 1);
106}
107
108template<>
109IDISA_ALWAYS_INLINE
110void
111clear_aligned_blocks<1>(BitBlock * stream)
112{
113        const BitBlock z = simd<8>::constant<0>();
114        stream[0] = z;
115}
116
117template<>
118IDISA_ALWAYS_INLINE
119void
120clear_aligned_blocks<0>(BitBlock *) { /* do nothing */ }
121
122/* ----------------------------------------------------------------------------------------------- */
123#if 0
124#define L1_CACHE_LINE_SIZE (32)
125
126template<unsigned int blocks = 1>
127IDISA_ALWAYS_INLINE
128void
129prefetch(const BitBlock * const prefetch_stream)
130{
131        enum
132        {
133                BIT_BLOCKS_PER_L1_CACHE_LINE =
134                        L1_CACHE_LINE_SIZE > sizeof(BitBlock)
135                        ? L1_CACHE_LINE_SIZE / sizeof(BitBlock)
136                        : 1
137        };
138        prefetch<1>(prefetch_stream);
139        prefetch<blocks - BIT_BLOCKS_PER_L1_CACHE_LINE>(&prefetch_stream[BIT_BLOCKS_PER_L1_CACHE_LINE]);
140}
141
142template<>
143IDISA_ALWAYS_INLINE
144void
145prefetch<1>(const BitBlock * const prefetch_stream)
146{
147        #ifdef __GNUC__
148                __builtin_prefetch((void*)(prefetch_stream));
149        #elif defined _MSC_VER
150                _mm_prefetch((char*)(prefetch_stream), _MM_HINT_NTA);
151        #endif
152}
153
154template<>
155IDISA_ALWAYS_INLINE
156void
157prefetch<0>(const BitBlock * const prefetch_stream) { /* do nothing */ }
158#endif
159
160#endif
Note: See TracBrowser for help on using the repository browser.