Check-in [a719156faf]
Overview
Comment:Switch to perfect hashing for lookups, to save space
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: a719156faf54149b345a68ea3a37ccd92c89df887236c4dae6c60be8edac901e
User & Date: rkeene on 2019-10-09 20:37:25
Other Links: manifest | tags
Context
2019-10-10
00:40
Fix error in constraining to range check-in: c89b6aa781 user: rkeene tags: trunk
2019-10-09
20:37
Switch to perfect hashing for lookups, to save space check-in: a719156faf user: rkeene tags: trunk
2019-09-20
20:52
More profiling work check-in: 22c09ebad1 user: rkeene tags: trunk
Changes

Modified lib/xvfs/xvfs.c.rvt from [5748d80b1d] to [8a68b99ecb].

38
39
40
41
42
43
44
45
46
47



48
49

50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86

87
88
89
90
91
92
93
94
95
96
97
98


99
100
101
102
103
104
105
106
107






108
109
110
111
112



113
114
115
116
117
118
119
120
121

122
123
124
125
126

127
128
129
130
131
132
133
134
135
38
39
40
41
42
43
44



45
46
47
48

49
50
51
52
53
54
55
56
57







58
59









60
61









62












63
64
65
66
67
68
69




70
71
72
73
74
75





76
77
78









79





80


81
82
83
84
85
86
87







-
-
-
+
+
+

-
+








-
-
-
-
-
-
-


-
-
-
-
-
-
-
-
-


-
-
-
-
-
-
-
-
-
+
-
-
-
-
-
-
-
-
-
-
-
-
+
+





-
-
-
-
+
+
+
+
+
+
-
-
-
-
-
+
+
+
-
-
-
-
-
-
-
-
-
+
-
-
-
-
-
+
-
-







#define HAVE_DEFINED_XVFS_SIZE_T 1
typedef Tcl_WideInt xvfs_size_t;
#endif

#ifndef HAVE_DEFINED_XVFS_FILE_DATA
#define HAVE_DEFINED_XVFS_FILE_DATA 1
struct xvfs_file_data {
	const char          *name;
	xvfs_file_type_t    type;
	xvfs_size_t         size;
	const char * const        name;
	const xvfs_file_type_t    type;
	const xvfs_size_t         size;
	union {
		const unsigned char *fileContents;
		const unsigned char * const fileContents;
		const char          **dirChildren;
	} data;
};
#endif

<?
	package require xvfs

	set ::xvfs::hashNameThreshold 3
	if {[info exists ::env(XVFS_CREATE_HASH_NAME_THRESHOLD)]} {
		set ::xvfs::hashNameThreshold $::env(XVFS_CREATE_HASH_NAME_THRESHOLD)
	}
	if {$::xvfs::hashNameThreshold < 0} {
		set ::xvfs::hashNameThreshold [expr {2**31}]
	}
	xvfs::main $::xvfs::argv

	proc emitFilenameVerification {indentLevel outputFileNameLen outputFileIndexes} {
		set indent [string repeat "\t" $indentLevel]
		foreach outputFileIndex $outputFileIndexes {
?><?= $indent ?>if (memcmp(path, xvfs_<?= $::xvfs::fsName ?>_data[<?= $outputFileIndex ?>].name, <?= $outputFileNameLen ?>) == 0) {
<?= $indent ?>	return(<?= $outputFileIndex ?>);
<?= $indent ?>}
<?
		}
	}
?>
static long xvfs_<?= $::xvfs::fsName ?>_nameToIndex(const char *path) {
<?
	for {set index 0} {$index < [llength $::xvfs::outputFiles]} {incr index} {
		set outputFileName [lindex $::xvfs::outputFiles $index]
		set outputFileNameLen [string length $outputFileName]
		set outputFileNameHash [zlib adler32 $outputFileName 0]
		lappend outputFileNameHashToIndex([list $outputFileNameLen $outputFileNameHash]) $index
		lappend outputFileNameLenToIndex($outputFileNameLen) $index
	}

	long pathIndex;
	set needZlib false
	foreach {outputFileNameLen outputFileIndexes} [lsort -stride 2 -dictionary [array get outputFileNameLenToIndex]] {
		if {[llength $outputFileIndexes] > $::xvfs::hashNameThreshold} {
			set needZlib true
			break;
		}
	}
?><?
	if {$needZlib} {
?>	unsigned int pathHash;
<?	} ?>	size_t pathLen;
	
	ssize_t pathLen;

	if (path == NULL) {
		return(XVFS_NAME_LOOKUP_ERROR);
	}

	pathLen = strlen(path);
	switch (pathLen) {
<?

	foreach {outputFileNameLen outputFileIndexes} [lsort -stride 2 -dictionary [array get outputFileNameLenToIndex]] {

	pathIndex = <?= [::xvfs::generatePerfectHashFunctionCall "path" pathLen XVFS_NAME_LOOKUP_ERROR $::xvfs::outputFiles] ?>;
	if (pathIndex < 0 || pathIndex > <?= [llength $::xvfs::outputFiles] ?>) {
		pathIndex = XVFS_NAME_LOOKUP_ERROR;
	}

?>		case <?= $outputFileNameLen ?>:
<?
			if {[llength $outputFileIndexes] > $::xvfs::hashNameThreshold} {
?>			pathHash = Tcl_ZlibAdler32(0, (const unsigned char *) path, <?= $outputFileNameLen ?>);
			switch (pathHash) {
	if (pathIndex != XVFS_NAME_LOOKUP_ERROR) {
		if (strcmp(path, xvfs_<?= $::xvfs::fsName ?>_data[pathIndex].name) == 0) {
			return(pathIndex);
<?
				foreach {key outputFileIndexes} [lsort -stride 2 -dictionary [array get outputFileNameHashToIndex [list $outputFileNameLen *]]] {
					set outputFileNameHash [lindex $key 1]
?>				case <?= $outputFileNameHash ?>:
<?
					emitFilenameVerification 5 $outputFileNameLen $outputFileIndexes
?>					break;	
<?
				}
		}
?>			}
<?
			} else {
				emitFilenameVerification 3 $outputFileNameLen $outputFileIndexes
			}
	}
?>			break;
<?	} ?>	}
	
	return(XVFS_NAME_LOOKUP_ERROR);
}

static const char **xvfs_<?= $::xvfs::fsName ?>_getChildren(const char *path, Tcl_WideInt *count) {
	const struct xvfs_file_data *fileInfo;
	long inode;

Modified lib/xvfs/xvfs.tcl from [f096c687f1] to [6f620f3c29].

306
307
308
309
310
311
312






























































313
314
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+


}

proc ::xvfs::staticIncludeHeader {pathToHeaderFile} {
	set fd [open $pathToHeaderFile]
	::xvfs::staticIncludeHeaderData [read $fd]
	close $fd
}

proc ::xvfs::generatePerfectHashFunctionCall {cVarName cVarLength invalidValue nameList} {
	set minVal 0
	set maxVal [llength $nameList]
	set testExpr {([zlib adler32 $nameItem $alpha] + $beta) % $gamma}
	set testExprC {((Tcl_ZlibAdler32($alpha, (unsigned char *) $cVarName, $cVarLength) + $beta) % $gamma)}

	set round -1

	set beta 0
	set gamma $maxVal

	while true {
		incr round

		set alpha $round
		set gamma [expr {($round % ($maxVal + 1)) + $maxVal}]

		set idx -1
		set seenIndexes [list]
		set failed false
		foreach nameItem $nameList {
			incr idx

			set testExprVal [expr $testExpr]

			if {$testExprVal in $seenIndexes} {
				incr alpha
				set failed true
				break
			}

			lappend seenIndexes $testExprVal
		}

		if {!$failed} {
			break
		}
	}

	unset -nocomplain mapArray
	for {set idx 0} {$idx < $gamma} {incr idx} {
		set mapArray($idx) $invalidValue
	}

	set idx -1
	foreach nameItem $nameList {
		incr idx

		set mapArray([expr $testExpr]) $idx
	}

	set map "(long\[\])\{"
	for {set idx 0} {$idx < $gamma} {incr idx} {
		append map "$mapArray($idx), "
	}
	set map [string range $map 0 end-2]
	append map "\}\[[subst $testExprC]\]"
	set phfCall $map

	return $phfCall
}

package provide xvfs 1