| [0aec496] | 1 | #include "filecache.hfa"
 | 
|---|
 | 2 | 
 | 
|---|
 | 3 | #include <assert.h>
 | 
|---|
 | 4 | #include <string.h>
 | 
|---|
 | 5 | 
 | 
|---|
| [8c43d05] | 6 | #include <fstream.hfa>
 | 
|---|
| [0aec496] | 7 | #include <stdlib.hfa>
 | 
|---|
 | 8 | 
 | 
|---|
 | 9 | #include <errno.h>
 | 
|---|
 | 10 | #include <unistd.h>
 | 
|---|
 | 11 | extern "C" {
 | 
|---|
 | 12 |         #include <fcntl.h>
 | 
|---|
 | 13 |         #include <ftw.h>
 | 
|---|
 | 14 | }
 | 
|---|
 | 15 | 
 | 
|---|
 | 16 | #include "options.hfa"
 | 
|---|
 | 17 | 
 | 
|---|
 | 18 | static inline uint32_t murmur_32_scramble(uint32_t k) {
 | 
|---|
 | 19 |     k *= 0xcc9e2d51;
 | 
|---|
 | 20 |     k = (k << 15) | (k >> 17);
 | 
|---|
 | 21 |     k *= 0x1b873593;
 | 
|---|
 | 22 |     return k;
 | 
|---|
 | 23 | }
 | 
|---|
 | 24 | 
 | 
|---|
 | 25 | uint32_t murmur3_32(const uint8_t* key, size_t len, uint32_t seed)
 | 
|---|
 | 26 | {
 | 
|---|
 | 27 |         uint32_t h = seed;
 | 
|---|
 | 28 |         uint32_t k;
 | 
|---|
 | 29 |         /* Read in groups of 4. */
 | 
|---|
 | 30 |         for (size_t i = len >> 2; i; i--) {
 | 
|---|
 | 31 |                 // Here is a source of differing results across endiannesses.
 | 
|---|
 | 32 |                 // A swap here has no effects on hash properties though.
 | 
|---|
 | 33 |                 memcpy(&k, key, sizeof(uint32_t));
 | 
|---|
 | 34 |                 key += sizeof(uint32_t);
 | 
|---|
 | 35 |                 h ^= murmur_32_scramble(k);
 | 
|---|
 | 36 |                 h = (h << 13) | (h >> 19);
 | 
|---|
 | 37 |                 h = h * 5 + 0xe6546b64;
 | 
|---|
 | 38 |         }
 | 
|---|
 | 39 |         /* Read the rest. */
 | 
|---|
 | 40 |         k = 0;
 | 
|---|
 | 41 |         for (size_t i = len & 3; i; i--) {
 | 
|---|
 | 42 |                 k <<= 8;
 | 
|---|
 | 43 |                 k |= key[i - 1];
 | 
|---|
 | 44 |         }
 | 
|---|
 | 45 |         // A swap is *not* necessary here because the preceding loop already
 | 
|---|
 | 46 |         // places the low bytes in the low places according to whatever endianness
 | 
|---|
 | 47 |         // we use. Swaps only apply when the memory is copied in a chunk.
 | 
|---|
 | 48 |         h ^= murmur_32_scramble(k);
 | 
|---|
 | 49 |         /* Finalize. */
 | 
|---|
 | 50 |         h ^= len;
 | 
|---|
 | 51 |         h ^= h >> 16;
 | 
|---|
 | 52 |         h *= 0x85ebca6b;
 | 
|---|
 | 53 |         h ^= h >> 13;
 | 
|---|
 | 54 |         h *= 0xc2b2ae35;
 | 
|---|
 | 55 |         h ^= h >> 16;
 | 
|---|
 | 56 |         return h;
 | 
|---|
 | 57 | }
 | 
|---|
 | 58 | 
 | 
|---|
| [53e4562] | 59 | static inline [unsigned size, char unit] human_size( size_t size ) {
 | 
|---|
 | 60 |         int idx = 0;
 | 
|---|
 | 61 |         static char units [] = { ' ', 'K', 'M', 'G', 'T' };
 | 
|---|
 | 62 |         while( size >= 1024 ) {
 | 
|---|
 | 63 |                 idx++;
 | 
|---|
 | 64 |                 size /= 1024;
 | 
|---|
 | 65 |                 if(idx >= 5) {
 | 
|---|
 | 66 |                         abort("File too large to print\n");
 | 
|---|
 | 67 |                 }
 | 
|---|
 | 68 |         }
 | 
|---|
 | 69 | 
 | 
|---|
 | 70 |         return [size, units[idx]];
 | 
|---|
 | 71 | }
 | 
|---|
| [0aec496] | 72 | 
 | 
|---|
 | 73 | struct {
 | 
|---|
 | 74 |         cache_line * entries;
 | 
|---|
 | 75 |         size_t size;
 | 
|---|
| [d11d6eb] | 76 |         int * rawfds;
 | 
|---|
 | 77 |         int nfds;
 | 
|---|
| [0aec496] | 78 | } file_cache;
 | 
|---|
 | 79 | 
 | 
|---|
 | 80 | void ?{}( cache_line & this ) with( this ) {
 | 
|---|
 | 81 |         file = 0p;
 | 
|---|
 | 82 |         size = 0;
 | 
|---|
 | 83 |         fd   = 0;
 | 
|---|
 | 84 | }
 | 
|---|
 | 85 | 
 | 
|---|
 | 86 | [int fd, size_t size] get_file( * const char file, size_t len ) {
 | 
|---|
| [2ecbd7b] | 87 |         uint32_t idx = murmur3_32( (const uint8_t *)file, len, options.file_cache.hash_seed ) % file_cache.size;
 | 
|---|
| [0aec496] | 88 | 
 | 
|---|
 | 89 |         for(int i = 0;; i++) {
 | 
|---|
 | 90 |                 assert( i < file_cache.size );
 | 
|---|
 | 91 |                 cache_line & entry = file_cache.entries[idx];
 | 
|---|
 | 92 |                 if( !entry.file ) return [-1, 0];
 | 
|---|
 | 93 |                 #if !defined(REJECT_CONFLICTS)
 | 
|---|
 | 94 |                         if( strncmp(entry.file, file, len) != 0 ) {
 | 
|---|
 | 95 |                                 idx = (idx + 1) % file_cache.size;
 | 
|---|
 | 96 |                                 continue;
 | 
|---|
 | 97 |                         }
 | 
|---|
 | 98 |                 #endif
 | 
|---|
 | 99 |                 return [entry.fd, entry.size];
 | 
|---|
 | 100 |         }
 | 
|---|
 | 101 | }
 | 
|---|
 | 102 | 
 | 
|---|
| [d11d6eb] | 103 | int put_file( cache_line & entry, int fd ) {
 | 
|---|
| [2ecbd7b] | 104 |         uint32_t idx = murmur3_32( (const uint8_t *)entry.file, strlen(entry.file), options.file_cache.hash_seed ) % file_cache.size;
 | 
|---|
| [0aec496] | 105 | 
 | 
|---|
 | 106 |         int i = 0;
 | 
|---|
 | 107 |         for(;file_cache.entries[idx].file; i++ ) {
 | 
|---|
 | 108 |                 assert( i < file_cache.size );
 | 
|---|
 | 109 |                 idx = (idx + 1) % file_cache.size;
 | 
|---|
 | 110 |         }
 | 
|---|
 | 111 | 
 | 
|---|
 | 112 |         file_cache.entries[idx] = entry;
 | 
|---|
| [d11d6eb] | 113 |         file_cache.entries[idx].fd = fd;
 | 
|---|
| [0aec496] | 114 |         return i > 0 ? 1 : 0;
 | 
|---|
 | 115 | }
 | 
|---|
 | 116 | 
 | 
|---|
 | 117 | // int ftw(const char *dirpath, int (*fn) (const char *fpath, const struct stat *sb, int typeflag), int nopenfd)
 | 
|---|
 | 118 | void fill_cache( const char * path ) {
 | 
|---|
 | 119 |         int ret;
 | 
|---|
| [53e4562] | 120 |         ret = chdir(path);
 | 
|---|
 | 121 |         if(ret < 0) {
 | 
|---|
 | 122 |                 abort( "chdir error: (%d) %s\n", (int)errno, strerror(errno) );
 | 
|---|
 | 123 |         }
 | 
|---|
 | 124 | 
 | 
|---|
| [0aec496] | 125 |         size_t fcount = 0;
 | 
|---|
 | 126 |         size_t fsize = 16;
 | 
|---|
| [d11d6eb] | 127 |         cache_line * raw = alloc(fsize);
 | 
|---|
| [0aec496] | 128 |         // Step 1 get a dense array of all files
 | 
|---|
 | 129 |         int walk(const char *fpath, const struct stat *sb, int typeflag) {
 | 
|---|
 | 130 |                 if(typeflag != FTW_F) return 0;
 | 
|---|
 | 131 | 
 | 
|---|
 | 132 |                 int idx = fcount;
 | 
|---|
 | 133 |                 fcount++;
 | 
|---|
 | 134 |                 if(fcount > fsize) {
 | 
|---|
 | 135 |                         fsize *= 2;
 | 
|---|
| [d11d6eb] | 136 |                         raw = alloc(fsize, raw`realloc);
 | 
|---|
| [0aec496] | 137 |                 }
 | 
|---|
 | 138 | 
 | 
|---|
 | 139 |                 raw[idx].file = strdup(fpath+2);
 | 
|---|
 | 140 |                 raw[idx].size = sb->st_size;
 | 
|---|
| [2ecbd7b] | 141 |                 if( !options.file_cache.list ) {
 | 
|---|
 | 142 |                         raw[idx].fd = open( fpath, options.file_cache.open_flags );
 | 
|---|
| [53e4562] | 143 |                         if(raw[idx].fd < 0) {
 | 
|---|
 | 144 |                                 abort( "open file error: (%d) %s\n", (int)errno, strerror(errno) );
 | 
|---|
 | 145 |                         }
 | 
|---|
| [0aec496] | 146 |                 }
 | 
|---|
 | 147 |                 return 0;
 | 
|---|
 | 148 |         }
 | 
|---|
 | 149 | 
 | 
|---|
| [53e4562] | 150 |         ret = ftw(".", walk, 10);
 | 
|---|
| [0aec496] | 151 |         if(ret < 0) {
 | 
|---|
 | 152 |                 abort( "ftw error: (%d) %s\n", (int)errno, strerror(errno) );
 | 
|---|
 | 153 |         }
 | 
|---|
 | 154 | 
 | 
|---|
 | 155 |         if(fcount == 0) {
 | 
|---|
 | 156 |                 abort("No file found in path %s\n", path);
 | 
|---|
 | 157 |         }
 | 
|---|
 | 158 | 
 | 
|---|
 | 159 |         // Step 2 create the cache
 | 
|---|
| [2ecbd7b] | 160 |         file_cache.size = options.file_cache.size > 0 ? options.file_cache.size : fsize;
 | 
|---|
| [0aec496] | 161 |         if( file_cache.size < fcount ) {
 | 
|---|
 | 162 |                 abort("File Cache too small\n");
 | 
|---|
 | 163 |         }
 | 
|---|
 | 164 | 
 | 
|---|
| [2ecbd7b] | 165 |         file_cache.entries = anew(file_cache.size);
 | 
|---|
| [0aec496] | 166 | 
 | 
|---|
| [d11d6eb] | 167 |         if(options.file_cache.fixed_fds) {
 | 
|---|
 | 168 |                 file_cache.nfds   = fcount;
 | 
|---|
 | 169 |                 file_cache.rawfds = alloc(fcount);
 | 
|---|
 | 170 |         }
 | 
|---|
 | 171 | 
 | 
|---|
| [0aec496] | 172 |         // Step 3 fill the cache
 | 
|---|
 | 173 |         int conflicts = 0;
 | 
|---|
 | 174 |         for(i; fcount) {
 | 
|---|
| [d11d6eb] | 175 |                 int fd;
 | 
|---|
 | 176 |                 if(options.file_cache.fixed_fds) {
 | 
|---|
 | 177 |                         file_cache.rawfds[i] = raw[i].fd;
 | 
|---|
 | 178 |                         fd = i;
 | 
|---|
 | 179 |                 }
 | 
|---|
 | 180 |                 else {
 | 
|---|
 | 181 |                         fd = raw[i].fd;
 | 
|---|
 | 182 |                 }
 | 
|---|
 | 183 |                 conflicts += put_file( raw[i], fd );
 | 
|---|
| [0aec496] | 184 |         }
 | 
|---|
| [8c43d05] | 185 |         sout | "Filled cache from path \"" | path | "\" with" | fcount | "files";
 | 
|---|
| [0aec496] | 186 |         if( conflicts > 0 ) {
 | 
|---|
| [8c43d05] | 187 |                 sout | "Found" | conflicts | "conflicts (seed: " | options.file_cache.hash_seed | ")";
 | 
|---|
| [0aec496] | 188 |                 #if defined(REJECT_CONFLICTS)
 | 
|---|
 | 189 |                         abort("Conflicts found in the cache");
 | 
|---|
 | 190 |                 #endif
 | 
|---|
 | 191 |         }
 | 
|---|
 | 192 | 
 | 
|---|
| [2ecbd7b] | 193 |         if(options.file_cache.list) {
 | 
|---|
| [8c43d05] | 194 |                 sout | "Listing files and exiting";
 | 
|---|
| [2ecbd7b] | 195 |                 for(i; fcount) {
 | 
|---|
 | 196 |                         int s; char u;
 | 
|---|
 | 197 |                         [s, u] = human_size(raw[i].size);
 | 
|---|
| [8c43d05] | 198 |                         sout | s | u | "-" | raw[i].file;
 | 
|---|
| [2ecbd7b] | 199 |                         free(raw[i].file);
 | 
|---|
 | 200 |                 }
 | 
|---|
 | 201 |                 free(raw);
 | 
|---|
| [5936244] | 202 |                 adelete( file_cache.entries );
 | 
|---|
| [2ecbd7b] | 203 |                 exit(0);
 | 
|---|
 | 204 |         }
 | 
|---|
 | 205 | 
 | 
|---|
| [0aec496] | 206 |         // Step 4 clean up
 | 
|---|
 | 207 |         free( raw );
 | 
|---|
 | 208 | }
 | 
|---|
 | 209 | 
 | 
|---|
| [d9c2284] | 210 | [int *, int] filefds(int extra) {
 | 
|---|
| [b57db73] | 211 |         if(!options.file_cache.path) {
 | 
|---|
 | 212 |                 int * data = alloc(extra);
 | 
|---|
 | 213 |                 return [data, 0];
 | 
|---|
 | 214 |         }
 | 
|---|
 | 215 | 
 | 
|---|
| [d9c2284] | 216 |         if(!file_cache.entries) {
 | 
|---|
 | 217 |                 abort("File cache not filled!\n");
 | 
|---|
 | 218 |         }
 | 
|---|
 | 219 | 
 | 
|---|
| [d11d6eb] | 220 |         size_t s = file_cache.nfds + extra;
 | 
|---|
 | 221 |         int * data = alloc(s, file_cache.rawfds`realloc);
 | 
|---|
 | 222 |         return [data, file_cache.nfds];
 | 
|---|
| [d9c2284] | 223 | }
 | 
|---|
 | 224 | 
 | 
|---|
 | 225 | 
 | 
|---|
| [0aec496] | 226 | void close_cache() {
 | 
|---|
 | 227 |         for(idx; file_cache.size) {
 | 
|---|
 | 228 |                 cache_line & entry = file_cache.entries[idx];
 | 
|---|
 | 229 |                 if( !entry.file ) continue;
 | 
|---|
 | 230 | 
 | 
|---|
 | 231 |                 int ret = close( entry.fd );
 | 
|---|
 | 232 |                 if(ret < 0) {
 | 
|---|
 | 233 |                         abort( "close file error: (%d) %s\n", (int)errno, strerror(errno) );
 | 
|---|
 | 234 |                 }
 | 
|---|
 | 235 |         }
 | 
|---|
 | 236 | 
 | 
|---|
 | 237 |         delete( file_cache.entries );
 | 
|---|
 | 238 | }
 | 
|---|