
Les langages de programmation sont mis face à face sur plusieurs terrains dont l’allocation (et la désallocation) d’arbres binaires et la visualisation d’ensembles de Mandelbrot sur un Intel i5-3330 quatre cœurs cadencé à 3 gigahertz et doté de près de 16 Go de mémoire vive. Les benchmarks ne dévoilent des chiffres comparatifs que pour les versions les plus rapides de programmes pour un couple donné de langages. Les codes sources de ces derniers sont disponibles dans le cas du couple Rust/C pour ce qui est du benchmark Mandelbrot :
Code Rust : | Sélectionner tout |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 | // The Computer Language Benchmarks Game // https://salsa.debian.org/benchmarksgame-team/benchmarksgame/ // // contributed by Matt Watson // contributed by TeXitoi // contributed by Volodymyr M. Lisivka // contributed by Michael Cicotti extern crate generic_array; extern crate num_traits; extern crate numeric_array; extern crate rayon; use generic_array::typenum::consts::U8; use numeric_array::NumericArray as Arr; use rayon::prelude::*; use std::io::Write; // [f64;8] type Vecf64 = Arr<f64, U8>; type Constf64 = numeric_array::NumericConstant<f64>; const MAX_ITER: usize = 50; const VLEN: usize = 8; #[inline(always)] pub fn mbrot8(out: &mut u8, cr: Vecf64, ci: Constf64) { let mut zr = Arr::splat(0f64); let mut zi = Arr::splat(0f64); let mut tr = Arr::splat(0f64); let mut ti = Arr::splat(0f64); let mut absz = Arr::splat(0f64); for _ in 0..MAX_ITER / 5 { for _ in 0..5 { zi = (zr + zr) * zi + ci; zr = tr - ti + cr; tr = zr * zr; ti = zi * zi; } absz = tr + ti; if absz.iter().all(|&t| t > 4.) { return; } } *out = absz.iter().enumerate().fold(0, |accu, (i, &t)| { accu | if t <= 4. { 0x80 >> i } else { 0 } }); } fn main() { let size = std::env::args() .nth(1) .and_then(|n| n.parse().ok()) .unwrap_or(200); // Round size to multiple of 8 let size = size / VLEN * VLEN; let inv = 2. / size as f64; let mut xloc = vec![Arr::splat(0f64); size / VLEN]; for i in 0..size { xloc[i / VLEN][i % VLEN] = i as f64 * inv - 1.5; } let stdout_unlocked = std::io::stdout(); // Main thread only can print to stdout let mut stdout = stdout_unlocked.lock(); println!("P4\n{} {}", size, size); let mut rows = vec![0; size * size / VLEN]; rows.par_chunks_mut(size / VLEN) .enumerate() .for_each(|(y, out)| { let ci = numeric_array::NumericConstant(y as f64 * inv - 1.); out.iter_mut() .enumerate() .for_each(|(i, inner_out)| mbrot8(inner_out, xloc[i], ci)); }); let _ = stdout.write_all(&rows); } |
Code C : | Sélectionner tout |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 | // The Computer Language Benchmarks Game // https://salsa.debian.org/benchmarksgame-team/benchmarksgame/ // // Contributed by Kevin Miller // // ver 2: added a couple of optimizations // - Reduced number of times a vector of 8 was checked to see if // they had all escaped, similar to Dominic Letz's C #5 entry. // - Processed 64 pixels at a time if width was a multiple of 64, // thereby reducing writes to the bitmap. // // compile with following gcc flags // -pipe -Wall -O3 -ffast-math -fno-finite-math-only -march=native -mfpmath=sse -msse3 -fopenmp #include <stdlib.h> #include <stdio.h> #include <unistd.h> #include <emmintrin.h> long numDigits(long n) { long len = 0; while(n) { n=n/10; len++; } return len; } inline long vec_nle(__m128d *v, double f) { return (v[0][0] <= f || v[0][1] <= f || v[1][0] <= f || v[1][1] <= f || v[2][0] <= f || v[2][1] <= f || v[3][0] <= f || v[3][1] <= f) ? 0 : -1; } inline void clrPixels_nle(__m128d *v, double f, unsigned long * pix8) { if(!(v[0][0] <= f)) *pix8 &= 0x7f; if(!(v[0][1] <= f)) *pix8 &= 0xbf; if(!(v[1][0] <= f)) *pix8 &= 0xdf; if(!(v[1][1] <= f)) *pix8 &= 0xef; if(!(v[2][0] <= f)) *pix8 &= 0xf7; if(!(v[2][1] <= f)) *pix8 &= 0xfb; if(!(v[3][0] <= f)) *pix8 &= 0xfd; if(!(v[3][1] <= f)) *pix8 &= 0xfe; } inline void calcSum(__m128d *r, __m128d *i, __m128d *sum, __m128d *init_r, __m128d init_i) { for(long pair=0; pair<4; pair++) { __m128d r2 = r[pair] * r[pair]; __m128d i2 = i[pair] * i[pair]; __m128d ri = r[pair] * i[pair]; sum[pair] = r2 + i2; r[pair]=r2 - i2 + init_r[pair]; i[pair]=ri + ri + init_i; } } inline unsigned long mand8(__m128d *init_r, __m128d init_i) { __m128d r[4], i[4], sum[4]; for(long pair=0; pair<4; pair++) { r[pair]=init_r[pair]; i[pair]=init_i; } unsigned long pix8 = 0xff; for (long j = 0; j < 6; j++) { for(long k=0; k<8; k++) calcSum(r, i, sum, init_r, init_i); if (vec_nle(sum, 4.0)) { pix8 = 0x00; break; } } if (pix8) { calcSum(r, i, sum, init_r, init_i); calcSum(r, i, sum, init_r, init_i); clrPixels_nle(sum, 4.0, &pix8); } return pix8; } unsigned long mand64(__m128d *init_r, __m128d init_i) { unsigned long pix64 = 0; for(long byte=0; byte<8; byte++) { unsigned long pix8 = mand8(init_r, init_i); pix64 = (pix64 >> 8) | (pix8 << 56); init_r += 4; } return pix64; } int main(int argc, char ** argv) { // get width/height from arguments long wid_ht = 16000; if (argc >= 2) { wid_ht = atoi(argv[1]); } wid_ht = (wid_ht+7) & ~7; // allocate memory for header and pixels long headerLength = numDigits(wid_ht)*2+5; long pad = ((headerLength + 7) & ~7) - headerLength; // pad aligns pixels on 8 long dataLength = headerLength + wid_ht*(wid_ht>>3); unsigned char * const buffer = malloc(pad + dataLength); unsigned char * const header = buffer + pad; unsigned char * const pixels = header + headerLength; // generate the bitmap header sprintf((char *)header, "P4\n%ld %ld\n", wid_ht, wid_ht); // calculate initial values, store in r0, i0 __m128d r0[wid_ht/2]; double i0[wid_ht]; for(long xy=0; xy<wid_ht; xy+=2) { r0[xy>>1] = 2.0 / wid_ht * (__m128d){xy, xy+1} - 1.5; i0[xy] = 2.0 / wid_ht * xy - 1.0; i0[xy+1] = 2.0 / wid_ht * (xy+1) - 1.0; } // generate the bitmap long use8 = wid_ht%64; if (use8) { // process 8 pixels (one byte) at a time #pragma omp parallel for schedule(guided) for(long y=0; y<wid_ht; y++) { __m128d init_i = (__m128d){i0[y], i0[y]}; long rowstart = y*wid_ht/8; for(long x=0; x<wid_ht; x+=8) { pixels[rowstart + x/8] = mand8(r0+x/2, init_i); } } } else { // process 64 pixels (8 bytes) at a time #pragma omp parallel for schedule(guided) for(long y=0; y<wid_ht; y++) { __m128d init_i = (__m128d){i0[y], i0[y]}; long rowstart = y*wid_ht/64; for(long x=0; x<wid_ht; x+=64) { ((unsigned long *)pixels)[rowstart + x/64] = mand64(r0+x/2, init_i); } } } // write the data long ret = ret = write(STDOUT_FILENO, header, dataLength); free(buffer); return 0; } |
Les chiffres :
L’explication de l’intérêt grandissant d’acteurs de la filière programmation système repose sur ce type de constats. Au troisième trimestre de l’année 2019, on parlait déjà de la possible entrée au sein du noyau Linux d’un framework pour la mise sur pied de pilotes en langage de programmation...
La fin de cet article est réservée aux abonnés. Soutenez le Club Developpez.com en prenant un abonnement pour que nous puissions continuer à vous proposer des publications.