Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Added Nakamichi |
---|---|
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
f996b5e7f000295df2ac3622d83965a4 |
User & Date: | m 2014-04-17 18:17:06 |
Context
2014-04-27
| ||
15:50 | Updated Nakamichi to round 5; Updated TODOs check-in: 5b6b454d6a user: m tags: trunk | |
2014-04-17
| ||
18:17 | Added Nakamichi check-in: f996b5e7f0 user: m tags: trunk | |
2014-04-10
| ||
17:10 | Added crush check-in: 2d4af53f63 user: m tags: trunk | |
Changes
Changes to src/CHANGELOG.
1 2 3 4 5 6 7 8 9 10 11 | 0.15 [+] added memcpy, memmove codecs [+] added crush [+] added zling [~] updated lz4 to r114 [~] updated LZJB to the latest FreeBSD version [~] updated tornado to 0.6a [-] removed xxhash256, it's dangerously weak [!] fixed integer underflow [!] fixed a segfault with nop codec | > | 1 2 3 4 5 6 7 8 9 10 11 12 | 0.15 [+] added memcpy, memmove codecs [+] added crush [+] added zling [+] added Nakamichi [~] updated lz4 to r114 [~] updated LZJB to the latest FreeBSD version [~] updated tornado to 0.6a [-] removed xxhash256, it's dangerously weak [!] fixed integer underflow [!] fixed a segfault with nop codec |
︙ | ︙ |
Changes to src/CMakeLists.txt.
︙ | ︙ | |||
33 34 35 36 37 38 39 40 41 42 43 44 45 46 | set(USE_LZV1 0) # inefficient set(USE_LZWC 0) # inefficient set(USE_LZX_COMPRESS 0) # inefficient, no decompressor set(USE_MINIHUFF 1) set(USE_MINIZ 1) set(USE_MMINI 0) # doesn't compile with MSVC10 set(USE_MURMUR 1) set(USE_NRV 0) # inefficient set(USE_QUICKLZ 1) set(USE_QUICKLZZIP 0) # quite good, but single-threaded. I may fix it one day. set(USE_RLE64 1) set(USE_SANMAYCE_FNV 1) set(USE_SCZ 0) # single threaded and far too inefficient to be worth fixing... set(USE_SHA3_RND1 0) # didn't make it to the final... | > | 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 | set(USE_LZV1 0) # inefficient set(USE_LZWC 0) # inefficient set(USE_LZX_COMPRESS 0) # inefficient, no decompressor set(USE_MINIHUFF 1) set(USE_MINIZ 1) set(USE_MMINI 0) # doesn't compile with MSVC10 set(USE_MURMUR 1) set(USE_NAKAMICHI 0) # inefficient set(USE_NRV 0) # inefficient set(USE_QUICKLZ 1) set(USE_QUICKLZZIP 0) # quite good, but single-threaded. I may fix it one day. set(USE_RLE64 1) set(USE_SANMAYCE_FNV 1) set(USE_SCZ 0) # single threaded and far too inefficient to be worth fixing... set(USE_SHA3_RND1 0) # didn't make it to the final... |
︙ | ︙ | |||
460 461 462 463 464 465 466 467 468 469 470 471 472 473 | set(OTHER_SRC ${OTHER_SRC} codecs/mmini/huffman.c codecs/mmini/lzl.c) add_definitions(-DFSBENCH_USE_MMINI) ENDIF(USE_MMINI) IF(USE_MURMUR) set(OTHER_SRC ${OTHER_SRC} codecs/MurmurHash3.cpp) add_definitions(-DFSBENCH_USE_MURMUR) ENDIF(USE_MURMUR) IF(USE_QUICKLZ) include_directories (${FSBENCH_SOURCE_DIR}/codecs/quicklz) set(OTHER_SRC ${OTHER_SRC} codecs/quicklz/quicklz1.c codecs/quicklz/quicklz2.c codecs/quicklz/quicklz3.c ) add_definitions(-DFSBENCH_USE_QUICKLZ) ENDIF(USE_QUICKLZ) IF(USE_QUICKLZZIP) include_directories (${FSBENCH_SOURCE_DIR}/codecs/quicklz) | > > > > | 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 | set(OTHER_SRC ${OTHER_SRC} codecs/mmini/huffman.c codecs/mmini/lzl.c) add_definitions(-DFSBENCH_USE_MMINI) ENDIF(USE_MMINI) IF(USE_MURMUR) set(OTHER_SRC ${OTHER_SRC} codecs/MurmurHash3.cpp) add_definitions(-DFSBENCH_USE_MURMUR) ENDIF(USE_MURMUR) IF(USE_NAKAMICHI) set(OTHER_SRC ${OTHER_SRC} codecs/Nakamichi.c) add_definitions(-DFSBENCH_USE_NAKAMICHI) ENDIF(USE_NAKAMICHI) IF(USE_QUICKLZ) include_directories (${FSBENCH_SOURCE_DIR}/codecs/quicklz) set(OTHER_SRC ${OTHER_SRC} codecs/quicklz/quicklz1.c codecs/quicklz/quicklz2.c codecs/quicklz/quicklz3.c ) add_definitions(-DFSBENCH_USE_QUICKLZ) ENDIF(USE_QUICKLZ) IF(USE_QUICKLZZIP) include_directories (${FSBENCH_SOURCE_DIR}/codecs/quicklz) |
︙ | ︙ |
Changes to src/README.
︙ | ︙ | |||
173 174 175 176 177 178 179 180 181 182 183 184 185 186 | mmini - lzl | Adam Ierymenko | 2010 | no | https://code.google.com/p/mmini LZMAT | Vitaly Evseenko | 1.1 | yes | www.matcode.com/lzmat.htm LZO | Markus Franz Xaver Johannes Oberhumer | 2.06 | yes | www.oberhumer.com/opensource/lzo LZSS | Ilia Muraviev | 2008-07-31 | no | http://encode.ru/threads/143-LZSS-v0-01-is-here! LZV1 | Hermann Vogt | 0.5 | no | http://encode.ru/threads/1661-LZWC-A-fast-tree-based-LZW-compressor lzx_compress | Matthew T. Russotto | 2005-07-06 | no | miniz | Richard Geldreich, Jr. | 1.11 | yes | https://code.google.com/p/miniz nrv2a | Markus Franz Xaver Johannes Oberhumer | 1.03 | no | www.oberhumer.com/opensource/ucl nrv2b | Markus Franz Xaver Johannes Oberhumer | 1.03 | no | www.oberhumer.com/opensource/ucl nrv2d | Markus Franz Xaver Johannes Oberhumer | 1.03 | no | www.oberhumer.com/opensource/ucl QuickLZ | Lasse Mikkel Reinhold | 1.5.1b6 | yes | www.quicklz.com QuickLZ zip | Lasse Mikkel Reinhold | 0.4 | no | www.quicklz.com/zip.html RLE64 | Javier GutiƩrrez Chamorro | R3.00 | yes | http://nikkhokkho.sourceforge.net/static.php?page=RLE64 SCZ | Carl Kindman | 2008-11-25 | no | | > | 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 | mmini - lzl | Adam Ierymenko | 2010 | no | https://code.google.com/p/mmini LZMAT | Vitaly Evseenko | 1.1 | yes | www.matcode.com/lzmat.htm LZO | Markus Franz Xaver Johannes Oberhumer | 2.06 | yes | www.oberhumer.com/opensource/lzo LZSS | Ilia Muraviev | 2008-07-31 | no | http://encode.ru/threads/143-LZSS-v0-01-is-here! LZV1 | Hermann Vogt | 0.5 | no | http://encode.ru/threads/1661-LZWC-A-fast-tree-based-LZW-compressor lzx_compress | Matthew T. Russotto | 2005-07-06 | no | miniz | Richard Geldreich, Jr. | 1.11 | yes | https://code.google.com/p/miniz Nakamichi | Georgi Marinov | r1 | no | http://www.codeproject.com/Articles/250566/Fastest-strstr-like-function-in-C?msg=4800986#xx4800986xx nrv2a | Markus Franz Xaver Johannes Oberhumer | 1.03 | no | www.oberhumer.com/opensource/ucl nrv2b | Markus Franz Xaver Johannes Oberhumer | 1.03 | no | www.oberhumer.com/opensource/ucl nrv2d | Markus Franz Xaver Johannes Oberhumer | 1.03 | no | www.oberhumer.com/opensource/ucl QuickLZ | Lasse Mikkel Reinhold | 1.5.1b6 | yes | www.quicklz.com QuickLZ zip | Lasse Mikkel Reinhold | 0.4 | no | www.quicklz.com/zip.html RLE64 | Javier GutiƩrrez Chamorro | R3.00 | yes | http://nikkhokkho.sourceforge.net/static.php?page=RLE64 SCZ | Carl Kindman | 2008-11-25 | no | |
︙ | ︙ |
Changes to src/codecs.cpp.
︙ | ︙ | |||
257 258 259 260 261 262 263 264 265 266 267 268 269 270 | #ifdef FSBENCH_USE_LZWC new Codec("LZWC", _LZWC_VERSION, LZWC_c, LZWC_d, no_blowup), #endif #ifdef FSBENCH_USE_MMINI new BufferedCodec("mmini_huffman", _MMINI_VERSION, mmini_huffman_c, mmini_huffman_d, no_blowup, MMINI_HUFFHEAP_SIZE), new Codec("mmini_lzl", _MMINI_VERSION, mmini_lzl_c, mmini_lzl_d, no_blowup), #endif #ifdef FSBENCH_USE_QUICKLZZIP new Codec("QuickLZ-zip", _QLZZIP_VERSION, qlzzip_c, 0), #endif #ifdef FSBENCH_USE_SHRINKER new Codec("Shrinker", _SHRINKER_VERSION, Shrinker_c, Shrinker_d, Shrinker_m), #endif #ifdef FSBENCH_USE_SNAPPY | > > > | 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 | #ifdef FSBENCH_USE_LZWC new Codec("LZWC", _LZWC_VERSION, LZWC_c, LZWC_d, no_blowup), #endif #ifdef FSBENCH_USE_MMINI new BufferedCodec("mmini_huffman", _MMINI_VERSION, mmini_huffman_c, mmini_huffman_d, no_blowup, MMINI_HUFFHEAP_SIZE), new Codec("mmini_lzl", _MMINI_VERSION, mmini_lzl_c, mmini_lzl_d, no_blowup), #endif #ifdef FSBENCH_USE_NAKAMICHI new Codec("Nakamichi", _NAKAMICHI_VERSION, nakamichi_c, nakamichi_d), #endif #ifdef FSBENCH_USE_QUICKLZZIP new Codec("QuickLZ-zip", _QLZZIP_VERSION, qlzzip_c, 0), #endif #ifdef FSBENCH_USE_SHRINKER new Codec("Shrinker", _SHRINKER_VERSION, Shrinker_c, Shrinker_d, Shrinker_m), #endif #ifdef FSBENCH_USE_SNAPPY |
︙ | ︙ | |||
510 511 512 513 514 515 516 517 518 519 520 521 522 523 | make_pair(raw_find_codec("LZSS-IM"), ""), make_pair(raw_find_codec("lzv1"), ""), make_pair(raw_find_codec("lzwc"), ""), make_pair(find_codec("lzx_compress/nop"), ""), make_pair(raw_find_codec("miniz"), ""), make_pair(raw_find_codec("mmini_huffman"), ""), make_pair(raw_find_codec("mmini_lzl"), ""), make_pair(raw_find_codec("nrv2b"), ""), make_pair(raw_find_codec("nrv2d"), ""), make_pair(raw_find_codec("nrv2e"), ""), make_pair(raw_find_codec("QuickLZ"), ""), make_pair(find_codec("QuickLZ-zip/zlib"), ""), make_pair(raw_find_codec("RLE64"), ""), make_pair(raw_find_codec("scz"), ""), | > | 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 | make_pair(raw_find_codec("LZSS-IM"), ""), make_pair(raw_find_codec("lzv1"), ""), make_pair(raw_find_codec("lzwc"), ""), make_pair(find_codec("lzx_compress/nop"), ""), make_pair(raw_find_codec("miniz"), ""), make_pair(raw_find_codec("mmini_huffman"), ""), make_pair(raw_find_codec("mmini_lzl"), ""), make_pair(raw_find_codec("Nakamichi"), ""), make_pair(raw_find_codec("nrv2b"), ""), make_pair(raw_find_codec("nrv2d"), ""), make_pair(raw_find_codec("nrv2e"), ""), make_pair(raw_find_codec("QuickLZ"), ""), make_pair(find_codec("QuickLZ-zip/zlib"), ""), make_pair(raw_find_codec("RLE64"), ""), make_pair(raw_find_codec("scz"), ""), |
︙ | ︙ |
Changes to src/codecs.hpp.
1 2 3 4 5 6 7 8 9 10 11 | /** * Written by m^2. * You can consider the code to be public domain. * If your country doesn't recognize author's right to relieve themselves of copyright, * you can use it under the terms of WTFPL version 2.0 or later. * * * To add a codec: * 1. Implement a Codec class for it or - if applicable - functions required by one of abstract ones * 2. Add sources to CMakeFiles.txt * 3. Add codec version to codecs.hpp | < | | | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 | /** * Written by m^2. * You can consider the code to be public domain. * If your country doesn't recognize author's right to relieve themselves of copyright, * you can use it under the terms of WTFPL version 2.0 or later. * * * To add a codec: * 1. Implement a Codec class for it or - if applicable - functions required by one of abstract ones * 2. Add sources to CMakeFiles.txt * 3. Add codec version to codecs.hpp * 4. Add codec to all_compressors / all_checksums / all_ciphers in codecs.cpp * 5. Update readme * 6. Update changelog */ #ifndef CODECS_HPP_BhjgkfG8 #define CODECS_HPP_BhjgkfG8 #include <list> #include <exception> #include <string> |
︙ | ︙ | |||
199 200 201 202 203 204 205 206 207 208 209 210 211 212 | #define _LZSSIM_VERSION "2008-07-31" #define _LZV1_VERSION "0.5" #define _LZWC_VERSION "0.4" #define _LZX_VERSION "2005-07-06" #define _MINIZ_VERSION "1.11" #define _MMINI_VERSION "2012-12-23" #define _MURMUR_VERSION "2012-02-29" #define _NRV_VERSION "1.03" #define _QLZ_VERSION "1.5.1b6" #define _QLZZIP_VERSION "0.4" #define _RLE64_VERSION "R3.00" #define _SANMAYCE_VERSION "2013-06-16" #define _SIPHASH_VERSION "reference" #define _SCZ_VERSION "2008-11-25" | > | 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 | #define _LZSSIM_VERSION "2008-07-31" #define _LZV1_VERSION "0.5" #define _LZWC_VERSION "0.4" #define _LZX_VERSION "2005-07-06" #define _MINIZ_VERSION "1.11" #define _MMINI_VERSION "2012-12-23" #define _MURMUR_VERSION "2012-02-29" #define _NAKAMICHI_VERSION "r1" #define _NRV_VERSION "1.03" #define _QLZ_VERSION "1.5.1b6" #define _QLZZIP_VERSION "0.4" #define _RLE64_VERSION "R3.00" #define _SANMAYCE_VERSION "2013-06-16" #define _SIPHASH_VERSION "reference" #define _SCZ_VERSION "2008-11-25" |
︙ | ︙ |
Added src/codecs/Nakamichi.c.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 | // Nakamichi, revision 1-RSSBO, written by Kaze. // Based on Nobuo Ito's source, thanks Ito. // The main goal of Nakamichi is to allow supersimple and superfast decoding for English x-grams (mainly) in pure C, or not, heh-heh. // Natively Nakamichi is targeted as 64bit tool with 16 threads, helping Kazahana to traverse faster when I/O is not superior. // In short, Nakamichi is intended as x-gram decompressor. // Eightfold Path ~ the Buddhist path to nirvana, comprising eight aspects in which an aspirant must become practised; // eightfold way ~ (Physics), the grouping of hadrons into supermultiplets by means of SU(3)); (b) adverb to eight times the number or quantity: OE. // Note1: Fifteenth bit is not used, making the window wider by 1bit i.e. 32KB is not tempting, rather I think to use it as a flag: 8bytes/16bytes. // Note2: English x-grams are as English texts but more redundant, in other words they are phraselists in most cases, sometimes wordlists. // Note3: On OSHO.TXT, being a typical English text, Nakamichi's compression ratio is among the worsts: // 206,908,949 OSHO.TXT // 125,022,859 OSHO.TXT.Nakamichi // It struggles with English texts but decomprression speed is quite sweet (Core 2 T7500 2200MHz, 32bit code): // Nakamichi, revision 1-, written by Kaze. // Decompressing 125022859 bytes ... // RAM-to-RAM performance: 477681 KB/s. // Note4: Also I wanted to see how my 'Railgun_Swampshine_BailOut', being a HEAVYGUN i.e. with big overhead and latency, hits in a real-world application. // Quick notes on PAGODAs (the padded x-gram lists): // Every single word in English has its own PAGODA, in example below 'on' PAGODA is given (Kazahana_on.PAGODA-order-5.txt): // PAGODA order 5 (i.e. with 5 tiers) has 5*(5+1)/2=15 subtiers, they are concatenated and space-padded in order to form the pillar 'on': /* D:\_KAZE\Nakamichi_r1-RSSBO>dir \_GW\ka* 04/12/2014 05:07 AM 14 Kazahana_on.1-1.txt 04/12/2014 05:07 AM 1,635,389 Kazahana_on.2-1.txt 04/12/2014 05:07 AM 1,906,734 Kazahana_on.2-2.txt 04/12/2014 05:07 AM 10,891,415 Kazahana_on.3-1.txt 04/12/2014 05:07 AM 15,797,703 Kazahana_on.3-2.txt 04/12/2014 05:07 AM 20,419,280 Kazahana_on.3-3.txt 04/12/2014 05:07 AM 22,141,823 Kazahana_on.4-1.txt 04/12/2014 05:07 AM 36,002,113 Kazahana_on.4-2.txt 04/12/2014 05:07 AM 33,236,772 Kazahana_on.4-3.txt 04/12/2014 05:07 AM 33,902,425 Kazahana_on.4-4.txt 04/12/2014 05:07 AM 24,795,989 Kazahana_on.5-1.txt 04/12/2014 05:07 AM 30,766,220 Kazahana_on.5-2.txt 04/12/2014 05:07 AM 38,982,816 Kazahana_on.5-3.txt 04/12/2014 05:07 AM 38,089,575 Kazahana_on.5-4.txt 04/12/2014 05:07 AM 34,309,057 Kazahana_on.5-5.txt 04/12/2014 05:07 AM 846,351,894 Kazahana_on.PAGODA-order-5.txt D:\_KAZE\Nakamichi_r1-RSSBO>type \_GW\Kazahana_on.1-1.txt 9,999,999 on D:\_KAZE\Nakamichi_r1-RSSBO>type \_GW\Kazahana_on.2-1.txt 9,999,999 on_the 1,148,054 on_his 0,559,694 on_her 0,487,856 on_this 0,399,485 on_your 0,381,570 on_my 0,367,282 on_their ... D:\_KAZE\Nakamichi_r1-RSSBO>type \_GW\Kazahana_on.2-2.txt 0,545,191 based_on 0,397,408 and_on 0,334,266 go_on 0,329,561 went_on 0,263,035 was_on 0,246,332 it_on 0,229,041 down_on 0,202,151 going_on ... D:\_KAZE\Nakamichi_r1-RSSBO>type \_GW\Kazahana_on.5-5.txt 0,083,564 foundation_osho_s_books_on 0,012,404 medium_it_may_be_on 0,012,354 if_you_received_it_on 0,012,152 medium_they_may_be_on 0,012,144 agree_to_also_provide_on 0,012,139 a_united_states_copyright_on 0,008,067 we_are_constantly_working_on 0,008,067 questions_we_have_received_on 0,006,847 file_was_first_posted_on 0,006,441 of_we_are_already_on 0,006,279 you_received_this_ebook_on 0,005,865 you_received_this_etext_on 0,005,833 to_keep_an_eye_on ... D:\_KAZE\Nakamichi_r1-RSSBO>dir 04/12/2014 05:07 AM 846,351,894 Kazahana_on.PAGODA-order-5.txt D:\_KAZE\Nakamichi_r1-RSSBO>Nakamichi.exe Kazahana_on.PAGODA-order-5.txt Nakamichi, revision 1-RSSBO, written by Kaze. Compressing 846351894 bytes ... /; Each rotation means 128KB are encoded; Done 100% RAM-to-RAM performance: 512 KB/s. D:\_KAZE\Nakamichi_r1-RSSBO>dir 04/12/2014 05:07 AM 846,351,894 Kazahana_on.PAGODA-order-5.txt 04/15/2014 06:30 PM 293,049,398 Kazahana_on.PAGODA-order-5.txt.Nakamichi D:\_KAZE\Nakamichi_r1-RSSBO>Nakamichi.exe Kazahana_on.PAGODA-order-5.txt.Nakamichi Nakamichi, revision 1-RSSBO, written by Kaze. Decompressing 293049398 bytes ... RAM-to-RAM performance: 607 MB/s. D:\_KAZE\Nakamichi_r1-RSSBO>Yappy.exe Kazahana_on.PAGODA-order-5.txt 4096 YAPPY: [b 4K] bytes 846351894 -> 191149889 22.6% comp 33.8 MB/s uncomp 875.4 MB/s D:\_KAZE\Nakamichi_r1-RSSBO>Yappy.exe Kazahana_on.PAGODA-order-5.txt 8192 YAPPY: [b 8K] bytes 846351894 -> 184153244 21.8% comp 35.0 MB/s uncomp 898.3 MB/s D:\_KAZE\Nakamichi_r1-RSSBO>Yappy.exe Kazahana_on.PAGODA-order-5.txt 16384 YAPPY: [b 16K] bytes 846351894 -> 180650931 21.3% comp 28.8 MB/s uncomp 906.4 MB/s D:\_KAZE\Nakamichi_r1-RSSBO>Yappy.exe Kazahana_on.PAGODA-order-5.txt 32768 YAPPY: [b 32K] bytes 846351894 -> 178902966 21.1% comp 35.0 MB/s uncomp 906.4 MB/s D:\_KAZE\Nakamichi_r1-RSSBO>Yappy.exe Kazahana_on.PAGODA-order-5.txt 65536 YAPPY: [b 64K] bytes 846351894 -> 178027899 21.0% comp 34.5 MB/s uncomp 914.6 MB/s D:\_KAZE\Nakamichi_r1-RSSBO>Yappy.exe Kazahana_on.PAGODA-order-5.txt 131072 YAPPY: [b 128K] bytes 846351894 -> 177591807 21.0% comp 34.9 MB/s uncomp 906.4 MB/s D:\_KAZE\Nakamichi_r1-RSSBO> */ #include <stdio.h> #include <stdlib.h> #include <stdint.h> // uint64_t needed #include <time.h> //#include <emmintrin.h> // SSE2 intrinsics //#include <smmintrin.h> // SSE4.1 intrinsics //#include <immintrin.h> // AVX intrinsics //void SlowCopy128bit (const char *SOURCE, char *TARGET) { _mm_storeu_si128((__m128i *)(TARGET), _mm_loadu_si128((const __m128i *)(SOURCE))); } #ifndef NULL #define NULL ((void*)0) #endif // Comment it to see how slower 'BruteForce' is, for Wikipedia 100MB the ratio is 41KB/s versus 197KB/s. #define ReplaceBruteForceWithRailgunSwampshineBailOut void SearchIntoSlidingWindow(unsigned int* retIndex, unsigned int* retMatch, char* refStart,char* refEnd,char* encStart,char* encEnd); unsigned int SlidingWindowVsLookAheadBuffer(char* refStart, char* refEnd, char* encStart, char* encEnd); unsigned int Compress(char* ret, char* src, unsigned int srcSize); unsigned int Decompress(char* ret, char* src, unsigned int srcSize); char * Railgun_Swampshine_BailOut(char * pbTarget, char * pbPattern, uint32_t cbTarget, uint32_t cbPattern); // Min_Match_Length=THRESHOLD=4 means 4 and bigger are to be encoded: #define Min_Match_BAILOUT_Length (8) #define Min_Match_Length (8) #define OffsetBITS (14) #define LengthBITS (1) //12bit //#define REF_SIZE (4095+Min_Match_Length) #define REF_SIZE ( ((1<<OffsetBITS)-1) + Min_Match_Length ) //3bit //#define ENC_SIZE (7+Min_Match_Length) #define ENC_SIZE ( ((1<<LengthBITS)-1) + Min_Match_Length ) /* int main( int argc, char *argv[] ) { FILE *fp; int SourceSize; int TargetSize; char* SourceBlock=NULL; char* TargetBlock=NULL; char* Nakamichi = ".Nakamichi\0"; char NewFileName[256]; clock_t clocks1, clocks2; printf("Nakamichi, revision 1-RSSBO, written by Kaze.\n"); if (argc==1) { printf("Usage: Nakamichi filename\n"); exit(13); } if ((fp = fopen(argv[1], "rb")) == NULL) { printf("Nakamichi: Can't open '%s' file.\n", argv[1]); exit(13); } fseek(fp, 0, SEEK_END); SourceSize = ftell(fp); fseek(fp, 0, SEEK_SET); // If filename ends in '.Nakamichi' then mode is decompression otherwise compression. if (strcmp(argv[1]+(strlen(argv[1])-strlen(Nakamichi)), Nakamichi) == 0) { SourceBlock = (char*)malloc(SourceSize+512); TargetBlock = (char*)malloc(5*SourceSize+512); fread(SourceBlock, 1, SourceSize, fp); fclose(fp); printf("Decompressing %d bytes ...\n", SourceSize ); clocks1 = clock(); TargetSize = Decompress(TargetBlock, SourceBlock, SourceSize); clocks2 = clock(); printf("RAM-to-RAM performance: %d MB/s.\n", ((TargetSize/(clocks2 - clocks1 + 1))*(long)1000)>>20); strcpy(NewFileName, argv[1]); *( NewFileName + strlen(argv[1])-strlen(Nakamichi) ) = '\0'; } else { SourceBlock = (char*)malloc(SourceSize+512); TargetBlock = (char*)malloc(SourceSize+512); fread(SourceBlock, 1, SourceSize, fp); fclose(fp); printf("Compressing %d bytes ...\n", SourceSize ); clocks1 = clock(); TargetSize = Compress(TargetBlock, SourceBlock, SourceSize); clocks2 = clock(); printf("RAM-to-RAM performance: %d KB/s.\n", ((SourceSize/(clocks2 - clocks1 + 1))*(long)1000)>>10); strcpy(NewFileName, argv[1]); strcat(NewFileName, Nakamichi); } if ((fp = fopen(NewFileName, "wb")) == NULL) { printf("Nakamichi: Can't write '%s' file.\n", NewFileName); exit(13); } fwrite(TargetBlock, 1, TargetSize, fp); fclose(fp); free(TargetBlock); free(SourceBlock); exit(0); } */ void SearchIntoSlidingWindow(unsigned int* retIndex, unsigned int* retMatch, char* refStart,char* refEnd,char* encStart,char* encEnd){ char* FoundAtPosition; unsigned int match=0; *retIndex=0; *retMatch=0; #ifdef ReplaceBruteForceWithRailgunSwampshineBailOut if (refStart < refEnd) { FoundAtPosition = Railgun_Swampshine_BailOut(refStart, encStart, (uint32_t)(refEnd-refStart), 8); if (FoundAtPosition!=NULL) { *retMatch=8; *retIndex=refEnd-FoundAtPosition; } } #else while(refStart < refEnd){ match=SlidingWindowVsLookAheadBuffer(refStart,refEnd,encStart,encEnd); if(match > *retMatch){ *retMatch=match; *retIndex=refEnd-refStart; } if(*retMatch >= Min_Match_BAILOUT_Length) break; refStart++; } #endif } unsigned int SlidingWindowVsLookAheadBuffer( char* refStart, char* refEnd, char* encStart,char* encEnd){ int ret = 0; while(refStart[ret] == encStart[ret]){ if(&refStart[ret] >= refEnd) break; if(&encStart[ret] >= encEnd) break; ret++; if(ret >= Min_Match_BAILOUT_Length) break; } return ret; } unsigned int Compress(char* ret, char* src, unsigned int srcSize){ unsigned int srcIndex=0; unsigned int retIndex=0; unsigned int index=0; unsigned int match=0; unsigned int notMatch=0; char* notMatchStart=NULL; char* refStart=NULL; char* encEnd=NULL; int Melnitchka=0; char *Auberge[4] = {"|\0","/\0","-\0","\\\0"}; int ProgressIndicator; while(srcIndex < srcSize){ if(srcIndex>=REF_SIZE) refStart=&src[srcIndex-REF_SIZE]; else refStart=src; if(srcIndex>=srcSize-ENC_SIZE) encEnd=&src[srcSize]; else encEnd=&src[srcIndex+ENC_SIZE]; SearchIntoSlidingWindow(&index,&match,refStart,&src[srcIndex],&src[srcIndex],encEnd); //if ( match<Min_Match_Length ) { //if ( match<Min_Match_Length || match<8 ) { if ( match<8 ) { if(notMatch==0){ notMatchStart=&ret[retIndex]; retIndex++; } else if (notMatch==127) { *notMatchStart=127; notMatch=0; notMatchStart=&ret[retIndex]; retIndex++; } ret[retIndex]=src[srcIndex]; retIndex++; notMatch++; srcIndex++; } else { match = 8; if(notMatch > 0){ *notMatchStart=notMatch; notMatch=0; } ret[retIndex] = 0x80; // 1bit+3bits+12bits: //ret[retIndex] = ret[retIndex] | ((match-Min_Match_Length)<<4); //ret[retIndex] = ret[retIndex] | (((index-Min_Match_Length) & 0x0F00)>>8); // 1bit+1bit+14bits: ret[retIndex] = ret[retIndex] | ((match-Min_Match_Length)<<(8-(LengthBITS+1))); ret[retIndex] = ret[retIndex] | (((index-Min_Match_Length) & 0x3F00)>>8); // 2+4+8=14 retIndex++; ret[retIndex] = (char)((index-Min_Match_Length) & 0x00FF); retIndex++; srcIndex+=match; } if (srcIndex % (1<<17) == 0) { ProgressIndicator = (int)( (srcIndex+1)*(float)100/(srcSize+1) ); //printf("%s; Each rotation means 128KB are encoded; Done %d%%\r", Auberge[Melnitchka++], ProgressIndicator ); Melnitchka = Melnitchka & 3; // 0 1 2 3: 00 01 10 11 } } if(notMatch > 0){ *notMatchStart=notMatch; } //printf("%s; Each rotation means 128KB are encoded; Done %d%%\n", Auberge[Melnitchka], 100 ); return retIndex; } unsigned int Decompress(char* ret, char* src, unsigned int srcSize){ unsigned int srcIndex=0; unsigned int retIndex=0; unsigned int index=0; unsigned int match=0; while(srcIndex < srcSize){ if((unsigned char)src[srcIndex] <= 127){ memcpy(&ret[retIndex],&src[srcIndex+1],src[srcIndex]); // Use padding and replace 'memcpy' with loop of 4 or 4+4 transfers/stores i.e. *()=DWORD retIndex+=src[srcIndex]; srcIndex+=(src[srcIndex]+1); } else{ // 1bit+3bits+12bits: //match = ((src[srcIndex] & 0x7F) >> 4)+Min_Match_Length; //index = (src[srcIndex] & 0x0F) << 8; // 1bit+1bit+14bits: //match = ((src[srcIndex] & 0x4F) >> 4)+Min_Match_Length; // In fact, not needed when eightfoldness is commenced, match is 8. match=8; // or 16 in next revision. index = (src[srcIndex] & 0x3F) << 8; srcIndex++; index = (index | (unsigned int)(0x00FF & src[srcIndex])) + Min_Match_Length; srcIndex++; //memcpy(&ret[retIndex],&ret[retIndex-index],match); // Replace 'memcpy' with 4 or 4+4 transfer/store i.e. *()=DWORD //*(uint32_t*)(ret+retIndex) = *(uint32_t*)(ret+retIndex-index); //*(uint32_t*)(ret+retIndex+4) = *(uint32_t*)(ret+retIndex-index+4); //*(uint64_t*)(ret+retIndex) = *(uint64_t*)(ret+retIndex-index); *(uint64_t*)(ret+retIndex) = *(uint64_t*)(ret+retIndex-index); retIndex+=match; } } return retIndex; } // Decompression main loop, 84-2e+2=88 bytes long: /* ; mark_description "Intel(R) C++ Compiler XE for applications running on IA-32, Version 12.1.1.258 Build 20111011"; ; mark_description "-O3 -FAcs"; .B5.3: 0002e 8d 14 1f lea edx, DWORD PTR [edi+ebx] 00031 0f be 0a movsx ecx, BYTE PTR [edx] 00034 0f b6 c1 movzx eax, cl 00037 83 f8 7f cmp eax, 127 0003a 7e 27 jle .B5.5 .B5.4: 0003c 83 e1 3f and ecx, 63 0003f 83 c3 02 add ebx, 2 00042 c1 e1 08 shl ecx, 8 00045 0f b6 42 01 movzx eax, BYTE PTR [1+edx] 00049 0b c8 or ecx, eax 0004b f7 d9 neg ecx 0004d 8d 54 35 00 lea edx, DWORD PTR [ebp+esi] 00051 83 c6 08 add esi, 8 00054 8b 44 11 f8 mov eax, DWORD PTR [-8+ecx+edx] 00058 8b 4c 11 fc mov ecx, DWORD PTR [-4+ecx+edx] 0005c 89 02 mov DWORD PTR [edx], eax 0005e 89 4a 04 mov DWORD PTR [4+edx], ecx 00061 eb 1d jmp .B5.7 .B5.5: 00063 51 push ecx 00064 8d 44 1f 01 lea eax, DWORD PTR [1+edi+ebx] 00068 50 push eax 00069 8d 54 35 00 lea edx, DWORD PTR [ebp+esi] 0006d 52 push edx 0006e e8 fc ff ff ff call __intel_fast_memcpy .B5.12: 00073 83 c4 0c add esp, 12 .B5.6: 00076 0f be 0c 3b movsx ecx, BYTE PTR [ebx+edi] 0007a 03 f1 add esi, ecx 0007c 8d 5c 0b 01 lea ebx, DWORD PTR [1+ebx+ecx] .B5.7: 00080 3b 5c 24 1c cmp ebx, DWORD PTR [28+esp] 00084 72 a8 jb .B5.3 */ // The Speed Showdown on my 'Bonboniera' laptop (Core 2 T7500 2200MHz, Windows 7 64bit): // Grrr, in round #1 Yappy kicked my ass, yet, 4 more rounds remain... /* D:\_KAZE\Nakamichi_r1-RSSBO>NakamichiVsYappy.bat Speed Showdown: Nakamichi VS Yappy, both compiled with Intel v12.1 ... D:\_KAZE\Nakamichi_r1-RSSBO>Yappy.exe enwiki-20140304-pages-articles.7z.001 1024 YAPPY: [b 1K] bytes 104857600 -> 78039768 74.4% comp 42.7 MB/s uncomp 680.3 MB/s D:\_KAZE\Nakamichi_r1-RSSBO>Yappy.exe enwiki-20140304-pages-articles.7z.001 2048 YAPPY: [b 2K] bytes 104857600 -> 70845249 67.6% comp 39.8 MB/s uncomp 623.3 MB/s D:\_KAZE\Nakamichi_r1-RSSBO>Yappy.exe enwiki-20140304-pages-articles.7z.001 4096 YAPPY: [b 4K] bytes 104857600 -> 64270963 61.3% comp 36.2 MB/s uncomp 583.1 MB/s D:\_KAZE\Nakamichi_r1-RSSBO>Yappy.exe enwiki-20140304-pages-articles.7z.001 8192 YAPPY: [b 8K] bytes 104857600 -> 59756354 57.0% comp 32.9 MB/s uncomp 549.5 MB/s D:\_KAZE\Nakamichi_r1-RSSBO>Yappy.exe enwiki-20140304-pages-articles.7z.001 16384 YAPPY: [b 16K] bytes 104857600 -> 57497885 54.8% comp 31.4 MB/s uncomp 541.5 MB/s D:\_KAZE\Nakamichi_r1-RSSBO>Yappy.exe enwiki-20140304-pages-articles.7z.001 32768 YAPPY: [b 32K] bytes 104857600 -> 56365696 53.8% comp 31.1 MB/s uncomp 541.5 MB/s D:\_KAZE\Nakamichi_r1-RSSBO>Yappy.exe enwiki-20140304-pages-articles.7z.001 65536 YAPPY: [b 64K] bytes 104857600 -> 55799079 53.2% comp 31.3 MB/s uncomp 534.3 MB/s D:\_KAZE\Nakamichi_r1-RSSBO>Yappy.exe enwiki-20140304-pages-articles.7z.001 262144 YAPPY: [b 256K] bytes 104857600 -> 55377127 52.8% comp 31.4 MB/s uncomp 534.3 MB/s D:\_KAZE\Nakamichi_r1-RSSBO>Nakamichi.exe enwiki-20140304-pages-articles.7z.001 Nakamichi, revision 1-RSSBO, written by Kaze. Compressing 104857600 bytes ... -; Each rotation means 128KB are encoded; Done 100% RAM-to-RAM performance: 198 KB/s. D:\_KAZE\Nakamichi_r1-RSSBO>Nakamichi.exe enwiki-20140304-pages-articles.7z.001.Nakamichi Nakamichi, revision 1-RSSBO, written by Kaze. Decompressing 70533827 bytes ... RAM-to-RAM performance: 531 MB/s. D:\_KAZE\Nakamichi_r1-RSSBO> */ // In my opinion Hamid Buzidi is the best, therefore his lzturbo v1.1 reference results are given below: /* D:\_KAZE\Nakamichi_r1-RSSBO>timer32 lzturbo.exe -19 -p0 enwiki-20140304-pages-articles.7z.001 . Kernel Time = 0.982 = 0% User Time = 152.537 = 99% Process Time = 153.520 = 100% Virtual Memory = 429 MB Global Time = 153.519 = 100% Physical Memory = 407 MB D:\_KAZE\Nakamichi_r1-RSSBO>timer32.exe lzturbo.exe -d enwiki-20140304-pages-articles.7z.001.lzt . Kernel Time = 0.234 = 62% User Time = 0.187 = 50% Process Time = 0.421 = 112% Virtual Memory = 98 MB Global Time = 0.374 = 100% Physical Memory = 70 MB D:\_KAZE\Nakamichi_r1-RSSBO>dir 04/15/2014 08:05 AM 104,857,600 enwiki-20140304-pages-articles.7z.001 04/15/2014 08:04 AM 41,984,881 enwiki-20140304-pages-articles.7z.001.lzt D:\_KAZE\Nakamichi_r1-RSSBO>timer32 lzturbo.exe -11 -p0 enwiki-20140304-pages-articles.7z.001 . Kernel Time = 0.171 = 9% User Time = 1.622 = 90% Process Time = 1.794 = 100% Virtual Memory = 58 MB Global Time = 1.794 = 100% Physical Memory = 39 MB D:\_KAZE\Nakamichi_r1-RSSBO>timer32.exe lzturbo.exe -d enwiki-20140304-pages-articles.7z.001.lzt . Kernel Time = 0.249 = 41% User Time = 0.140 = 23% Process Time = 0.390 = 64% Virtual Memory = 98 MB Global Time = 0.608 = 100% Physical Memory = 73 MB D:\_KAZE\Nakamichi_r1-RSSBO>dir 04/15/2014 08:05 AM 104,857,600 enwiki-20140304-pages-articles.7z.001 04/15/2014 08:05 AM 47,685,453 enwiki-20140304-pages-articles.7z.001.lzt D:\_KAZE\Nakamichi_r1-RSSBO> */ // Railgun_Swampshine_BailOut, copyleft 2014-Jan-31, Kaze. // Caution: For better speed the case 'if (cbPattern==1)' was removed, so Pattern must be longer than 1 char. #define NeedleThreshold2vs4swampLITE 9+10 // Should be bigger than 9. BMH2 works up to this value (inclusive), if bigger then BMH4 takes over. char * Railgun_Swampshine_BailOut (char * pbTarget, char * pbPattern, uint32_t cbTarget, uint32_t cbPattern) { char * pbTargetMax = pbTarget + cbTarget; register uint32_t ulHashPattern; signed long count; unsigned char bm_Horspool_Order2[256*256]; // Bitwise soon... uint32_t i, Gulliver; uint32_t PRIMALposition, PRIMALpositionCANDIDATE; uint32_t PRIMALlength, PRIMALlengthCANDIDATE; uint32_t j, FoundAtPosition; if (cbPattern > cbTarget) return(NULL); if ( cbPattern<4 ) { // SSE2 i.e. 128bit Assembly rules here: // ... pbTarget = pbTarget+cbPattern; ulHashPattern = ( (*(char *)(pbPattern))<<8 ) + *(pbPattern+(cbPattern-1)); if ( cbPattern==3 ) { for ( ;; ) { if ( ulHashPattern == ( (*(char *)(pbTarget-3))<<8 ) + *(pbTarget-1) ) { if ( *(char *)(pbPattern+1) == *(char *)(pbTarget-2) ) return((pbTarget-3)); } if ( (char)(ulHashPattern>>8) != *(pbTarget-2) ) { pbTarget++; if ( (char)(ulHashPattern>>8) != *(pbTarget-2) ) pbTarget++; } pbTarget++; if (pbTarget > pbTargetMax) return(NULL); } } else { } for ( ;; ) { if ( ulHashPattern == ( (*(char *)(pbTarget-2))<<8 ) + *(pbTarget-1) ) return((pbTarget-2)); if ( (char)(ulHashPattern>>8) != *(pbTarget-1) ) pbTarget++; pbTarget++; if (pbTarget > pbTargetMax) return(NULL); } } else { //if ( cbPattern<4 ) if ( cbPattern<=NeedleThreshold2vs4swampLITE ) { // BMH order 2, needle should be >=4: ulHashPattern = *(uint32_t *)(pbPattern); // First four bytes for (i=0; i < 256*256; i++) {bm_Horspool_Order2[i]=0;} for (i=0; i < cbPattern-1; i++) bm_Horspool_Order2[*(unsigned short *)(pbPattern+i)]=1; i=0; while (i <= cbTarget-cbPattern) { Gulliver = 1; // 'Gulliver' is the skip if ( bm_Horspool_Order2[*(unsigned short *)&pbTarget[i+cbPattern-1-1]] != 0 ) { if ( bm_Horspool_Order2[*(unsigned short *)&pbTarget[i+cbPattern-1-1-2]] == 0 ) Gulliver = cbPattern-(2-1)-2; else { if ( *(uint32_t *)&pbTarget[i] == ulHashPattern) { // This fast check ensures not missing a match (for remainder) when going under 0 in loop below: count = cbPattern-4+1; while ( count > 0 && *(uint32_t *)(pbPattern+count-1) == *(uint32_t *)(&pbTarget[i]+(count-1)) ) count = count-4; if ( count <= 0 ) return(pbTarget+i); } } } else Gulliver = cbPattern-(2-1); i = i + Gulliver; //GlobalI++; // Comment it, it is only for stats. } return(NULL); } else { // if ( cbPattern<=NeedleThreshold2vs4swampLITE ) // Swampwalker_BAILOUT heuristic order 4 (Needle should be bigger than 4) [ // Needle: 1234567890qwertyuiopasdfghjklzxcv PRIMALposition=01 PRIMALlength=33 '1234567890qwertyuiopasdfghjklzxcv' // Needle: vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv PRIMALposition=29 PRIMALlength=04 'vvvv' // Needle: vvvvvvvvvvBOOMSHAKALAKAvvvvvvvvvv PRIMALposition=08 PRIMALlength=20 'vvvBOOMSHAKALAKAvvvv' // Needle: Trollland PRIMALposition=01 PRIMALlength=09 'Trollland' // Needle: Swampwalker PRIMALposition=01 PRIMALlength=11 'Swampwalker' // Needle: licenselessness PRIMALposition=01 PRIMALlength=15 'licenselessness' // Needle: alfalfa PRIMALposition=02 PRIMALlength=06 'lfalfa' // Needle: Sandokan PRIMALposition=01 PRIMALlength=08 'Sandokan' // Needle: shazamish PRIMALposition=01 PRIMALlength=09 'shazamish' // Needle: Simplicius Simplicissimus PRIMALposition=06 PRIMALlength=20 'icius Simplicissimus' // Needle: domilliaquadringenquattuorquinquagintillion PRIMALposition=01 PRIMALlength=32 'domilliaquadringenquattuorquinqu' // Needle: boom-boom PRIMALposition=02 PRIMALlength=08 'oom-boom' // Needle: vvvvv PRIMALposition=01 PRIMALlength=04 'vvvv' // Needle: 12345 PRIMALposition=01 PRIMALlength=05 '12345' // Needle: likey-likey PRIMALposition=03 PRIMALlength=09 'key-likey' // Needle: BOOOOOM PRIMALposition=03 PRIMALlength=05 'OOOOM' // Needle: aaaaaBOOOOOM PRIMALposition=02 PRIMALlength=09 'aaaaBOOOO' // Needle: BOOOOOMaaaaa PRIMALposition=03 PRIMALlength=09 'OOOOMaaaa' PRIMALlength=0; for (i=0+(1); i < cbPattern-((4)-1)+(1)-(1); i++) { // -(1) because the last BB order 4 has no counterpart(s) FoundAtPosition = cbPattern - ((4)-1) + 1; PRIMALpositionCANDIDATE=i; while ( PRIMALpositionCANDIDATE <= (FoundAtPosition-1) ) { j = PRIMALpositionCANDIDATE + 1; while ( j <= (FoundAtPosition-1) ) { if ( *(uint32_t *)(pbPattern+PRIMALpositionCANDIDATE-(1)) == *(uint32_t *)(pbPattern+j-(1)) ) FoundAtPosition = j; j++; } PRIMALpositionCANDIDATE++; } PRIMALlengthCANDIDATE = (FoundAtPosition-1)-i+1 +((4)-1); if (PRIMALlengthCANDIDATE >= PRIMALlength) {PRIMALposition=i; PRIMALlength = PRIMALlengthCANDIDATE;} if (cbPattern-i+1 <= PRIMALlength) break; if (PRIMALlength > 128) break; // Bail Out for 129[+] } // Swampwalker_BAILOUT heuristic order 4 (Needle should be bigger than 4) ] // Here we have 4 or bigger NewNeedle, apply order 2 for pbPattern[i+(PRIMALposition-1)] with length 'PRIMALlength' and compare the pbPattern[i] with length 'cbPattern': PRIMALlengthCANDIDATE = cbPattern; cbPattern = PRIMALlength; pbPattern = pbPattern + (PRIMALposition-1); // Revision 2 commented section [ /* if (cbPattern-1 <= 255) { // BMH Order 2 [ ulHashPattern = *(uint32_t *)(pbPattern); // First four bytes for (i=0; i < 256*256; i++) {bm_Horspool_Order2[i]= cbPattern-1;} // cbPattern-(Order-1) for Horspool; 'memset' if not optimized for (i=0; i < cbPattern-1; i++) bm_Horspool_Order2[*(unsigned short *)(pbPattern+i)]=i; // Rightmost appearance/position is needed i=0; while (i <= cbTarget-cbPattern) { Gulliver = bm_Horspool_Order2[*(unsigned short *)&pbTarget[i+cbPattern-1-1]]; if ( Gulliver != cbPattern-1 ) { // CASE #2: if equal means the pair (char order 2) is not found i.e. Gulliver remains intact, skip the whole pattern and fall back (Order-1) chars i.e. one char for Order 2 if ( Gulliver == cbPattern-2 ) { // CASE #1: means the pair (char order 2) is found if ( *(uint32_t *)&pbTarget[i] == ulHashPattern) { count = cbPattern-4+1; while ( count > 0 && *(uint32_t *)(pbPattern+count-1) == *(uint32_t *)(&pbTarget[i]+(count-1)) ) count = count-4; // If we miss to hit then no need to compare the original: Needle if ( count <= 0 ) { // I have to add out-of-range checks... // i-(PRIMALposition-1) >= 0 // &pbTarget[i-(PRIMALposition-1)] <= pbTargetMax - 4 // i-(PRIMALposition-1)+(count-1) >= 0 // &pbTarget[i-(PRIMALposition-1)+(count-1)] <= pbTargetMax - 4 if ( (i-(PRIMALposition-1) >= 0) && (&pbTarget[i-(PRIMALposition-1)] <= pbTargetMax - 4) && (&pbTarget[i-(PRIMALposition-1)+(count-1)] <= pbTargetMax - 4) ) { if ( *(uint32_t *)&pbTarget[i-(PRIMALposition-1)] == *(uint32_t *)(pbPattern-(PRIMALposition-1))) { // This fast check ensures not missing a match (for remainder) when going under 0 in loop below: count = PRIMALlengthCANDIDATE-4+1; while ( count > 0 && *(uint32_t *)(pbPattern-(PRIMALposition-1)+count-1) == *(uint32_t *)(&pbTarget[i-(PRIMALposition-1)]+(count-1)) ) count = count-4; if ( count <= 0 ) return(pbTarget+i-(PRIMALposition-1)); } } } } Gulliver = 1; } else Gulliver = cbPattern - Gulliver - 2; // CASE #3: the pair is found and not as suffix i.e. rightmost position } i = i + Gulliver; //GlobalI++; // Comment it, it is only for stats. } return(NULL); // BMH Order 2 ] } else { // BMH order 2, needle should be >=4: ulHashPattern = *(uint32_t *)(pbPattern); // First four bytes for (i=0; i < 256*256; i++) {bm_Horspool_Order2[i]=0;} for (i=0; i < cbPattern-1; i++) bm_Horspool_Order2[*(unsigned short *)(pbPattern+i)]=1; i=0; while (i <= cbTarget-cbPattern) { Gulliver = 1; // 'Gulliver' is the skip if ( bm_Horspool_Order2[*(unsigned short *)&pbTarget[i+cbPattern-1-1]] != 0 ) { if ( bm_Horspool_Order2[*(unsigned short *)&pbTarget[i+cbPattern-1-1-2]] == 0 ) Gulliver = cbPattern-(2-1)-2; else { if ( *(uint32_t *)&pbTarget[i] == ulHashPattern) { // This fast check ensures not missing a match (for remainder) when going under 0 in loop below: count = cbPattern-4+1; while ( count > 0 && *(uint32_t *)(pbPattern+count-1) == *(uint32_t *)(&pbTarget[i]+(count-1)) ) count = count-4; // If we miss to hit then no need to compare the original: Needle if ( count <= 0 ) { // I have to add out-of-range checks... // i-(PRIMALposition-1) >= 0 // &pbTarget[i-(PRIMALposition-1)] <= pbTargetMax - 4 // i-(PRIMALposition-1)+(count-1) >= 0 // &pbTarget[i-(PRIMALposition-1)+(count-1)] <= pbTargetMax - 4 if ( (i-(PRIMALposition-1) >= 0) && (&pbTarget[i-(PRIMALposition-1)] <= pbTargetMax - 4) && (&pbTarget[i-(PRIMALposition-1)+(count-1)] <= pbTargetMax - 4) ) { if ( *(uint32_t *)&pbTarget[i-(PRIMALposition-1)] == *(uint32_t *)(pbPattern-(PRIMALposition-1))) { // This fast check ensures not missing a match (for remainder) when going under 0 in loop below: count = PRIMALlengthCANDIDATE-4+1; while ( count > 0 && *(uint32_t *)(pbPattern-(PRIMALposition-1)+count-1) == *(uint32_t *)(&pbTarget[i-(PRIMALposition-1)]+(count-1)) ) count = count-4; if ( count <= 0 ) return(pbTarget+i-(PRIMALposition-1)); } } } } } } else Gulliver = cbPattern-(2-1); i = i + Gulliver; //GlobalI++; // Comment it, it is only for stats. } return(NULL); } */ // Revision 2 commented section ] if ( cbPattern<=NeedleThreshold2vs4swampLITE ) { // BMH order 2, needle should be >=4: ulHashPattern = *(uint32_t *)(pbPattern); // First four bytes for (i=0; i < 256*256; i++) {bm_Horspool_Order2[i]=0;} for (i=0; i < cbPattern-1; i++) bm_Horspool_Order2[*(unsigned short *)(pbPattern+i)]=1; i=0; while (i <= cbTarget-cbPattern) { Gulliver = 1; // 'Gulliver' is the skip if ( bm_Horspool_Order2[*(unsigned short *)&pbTarget[i+cbPattern-1-1]] != 0 ) { if ( bm_Horspool_Order2[*(unsigned short *)&pbTarget[i+cbPattern-1-1-2]] == 0 ) Gulliver = cbPattern-(2-1)-2; else { if ( *(uint32_t *)&pbTarget[i] == ulHashPattern) { // This fast check ensures not missing a match (for remainder) when going under 0 in loop below: count = cbPattern-4+1; while ( count > 0 && *(uint32_t *)(pbPattern+count-1) == *(uint32_t *)(&pbTarget[i]+(count-1)) ) count = count-4; // If we miss to hit then no need to compare the original: Needle if ( count <= 0 ) { // I have to add out-of-range checks... // i-(PRIMALposition-1) >= 0 // &pbTarget[i-(PRIMALposition-1)] <= pbTargetMax - 4 // i-(PRIMALposition-1)+(count-1) >= 0 // &pbTarget[i-(PRIMALposition-1)+(count-1)] <= pbTargetMax - 4 if ( (i-(PRIMALposition-1) >= 0) && (&pbTarget[i-(PRIMALposition-1)] <= pbTargetMax - 4) && (&pbTarget[i-(PRIMALposition-1)+(count-1)] <= pbTargetMax - 4) ) { if ( *(uint32_t *)&pbTarget[i-(PRIMALposition-1)] == *(uint32_t *)(pbPattern-(PRIMALposition-1))) { // This fast check ensures not missing a match (for remainder) when going under 0 in loop below: count = PRIMALlengthCANDIDATE-4+1; while ( count > 0 && *(uint32_t *)(pbPattern-(PRIMALposition-1)+count-1) == *(uint32_t *)(&pbTarget[i-(PRIMALposition-1)]+(count-1)) ) count = count-4; if ( count <= 0 ) return(pbTarget+i-(PRIMALposition-1)); } } } } } } else Gulliver = cbPattern-(2-1); i = i + Gulliver; //GlobalI++; // Comment it, it is only for stats. } return(NULL); } else { // if ( cbPattern<=NeedleThreshold2vs4swampLITE ) // BMH pseudo-order 4, needle should be >=8+2: ulHashPattern = *(uint32_t *)(pbPattern); // First four bytes for (i=0; i < 256*256; i++) {bm_Horspool_Order2[i]=0;} // In line below we "hash" 4bytes to 2bytes i.e. 16bit table, how to compute TOTAL number of BBs, 'cbPattern - Order + 1' is the number of BBs for text 'cbPattern' bytes long, for example, for cbPattern=11 'fastest fox' and Order=4 we have BBs = 11-4+1=8: //"fast" //"aste" //"stes" //"test" //"est " //"st f" //"t fo" //" fox" //for (i=0; i < cbPattern-4+1; i++) bm_Horspool_Order2[( *(unsigned short *)(pbPattern+i+0) + *(unsigned short *)(pbPattern+i+2) ) & ( (1<<16)-1 )]=1; //for (i=0; i < cbPattern-4+1; i++) bm_Horspool_Order2[( (*(uint32_t *)(pbPattern+i+0)>>16)+(*(uint32_t *)(pbPattern+i+0)&0xFFFF) ) & ( (1<<16)-1 )]=1; // Above line is replaced by next one with better hashing: for (i=0; i < cbPattern-4+1; i++) bm_Horspool_Order2[( (*(uint32_t *)(pbPattern+i+0)>>(16-1))+(*(uint32_t *)(pbPattern+i+0)&0xFFFF) ) & ( (1<<16)-1 )]=1; i=0; while (i <= cbTarget-cbPattern) { Gulliver = 1; //if ( bm_Horspool_Order2[( (*(uint32_t *)&pbTarget[i+cbPattern-1-1-2]>>16)+(*(uint32_t *)&pbTarget[i+cbPattern-1-1-2]&0xFFFF) ) & ( (1<<16)-1 )] != 0 ) { // DWORD #1 // Above line is replaced by next one with better hashing: if ( bm_Horspool_Order2[( (*(uint32_t *)&pbTarget[i+cbPattern-1-1-2]>>(16-1))+(*(uint32_t *)&pbTarget[i+cbPattern-1-1-2]&0xFFFF) ) & ( (1<<16)-1 )] != 0 ) { // DWORD #1 //if ( bm_Horspool_Order2[( (*(uint32_t *)&pbTarget[i+cbPattern-1-1-2-4]>>16)+(*(uint32_t *)&pbTarget[i+cbPattern-1-1-2-4]&0xFFFF) ) & ( (1<<16)-1 )] == 0 ) Gulliver = cbPattern-(2-1)-2-4; else { // Above line is replaced in order to strengthen the skip by checking the middle DWORD,if the two DWORDs are 'ab' and 'cd' i.e. [2x][2a][2b][2c][2d] then the middle DWORD is 'bc'. // The respective offsets (backwards) are: -10/-8/-6/-4 for 'xa'/'ab'/'bc'/'cd'. //if ( ( bm_Horspool_Order2[( (*(uint32_t *)&pbTarget[i+cbPattern-1-1-2-6]>>16)+(*(uint32_t *)&pbTarget[i+cbPattern-1-1-2-6]&0xFFFF) ) & ( (1<<16)-1 )] ) + ( bm_Horspool_Order2[( (*(uint32_t *)&pbTarget[i+cbPattern-1-1-2-4]>>16)+(*(uint32_t *)&pbTarget[i+cbPattern-1-1-2-4]&0xFFFF) ) & ( (1<<16)-1 )] ) + ( bm_Horspool_Order2[( (*(uint32_t *)&pbTarget[i+cbPattern-1-1-2-2]>>16)+(*(uint32_t *)&pbTarget[i+cbPattern-1-1-2-2]&0xFFFF) ) & ( (1<<16)-1 )] ) < 3 ) Gulliver = cbPattern-(2-1)-2-4-2; else { // Above line is replaced by next one with better hashing: // When using (16-1) right shifting instead of 16 we will have two different pairs (if they are equal), the highest bit being lost do the job especialy for ASCII texts with no symbols in range 128-255. // Example for genomesque pair TT+TT being shifted by (16-1): // T = 01010100 // TT = 01010100 01010100 // TTTT = 01010100 01010100 01010100 01010100 // TTTT>>16 = 00000000 00000000 01010100 01010100 // TTTT>>(16-1) = 00000000 00000000 10101000 10101000 <--- Due to the left shift by 1, the 8th bits of 1st and 2nd bytes are populated - usually they are 0 for English texts & 'ACGT' data. //if ( ( bm_Horspool_Order2[( (*(uint32_t *)&pbTarget[i+cbPattern-1-1-2-6]>>(16-1))+(*(uint32_t *)&pbTarget[i+cbPattern-1-1-2-6]&0xFFFF) ) & ( (1<<16)-1 )] ) + ( bm_Horspool_Order2[( (*(uint32_t *)&pbTarget[i+cbPattern-1-1-2-4]>>(16-1))+(*(uint32_t *)&pbTarget[i+cbPattern-1-1-2-4]&0xFFFF) ) & ( (1<<16)-1 )] ) + ( bm_Horspool_Order2[( (*(uint32_t *)&pbTarget[i+cbPattern-1-1-2-2]>>(16-1))+(*(uint32_t *)&pbTarget[i+cbPattern-1-1-2-2]&0xFFFF) ) & ( (1<<16)-1 )] ) < 3 ) Gulliver = cbPattern-(2-1)-2-4-2; else { // 'Maximus' uses branched 'if', again. if ( \ ( bm_Horspool_Order2[( (*(uint32_t *)&pbTarget[i+cbPattern-1-1-2-6 +1]>>(16-1))+(*(uint32_t *)&pbTarget[i+cbPattern-1-1-2-6 +1]&0xFFFF) ) & ( (1<<16)-1 )] ) == 0 \ || ( bm_Horspool_Order2[( (*(uint32_t *)&pbTarget[i+cbPattern-1-1-2-4 +1]>>(16-1))+(*(uint32_t *)&pbTarget[i+cbPattern-1-1-2-4 +1]&0xFFFF) ) & ( (1<<16)-1 )] ) == 0 \ ) Gulliver = cbPattern-(2-1)-2-4-2 +1; else { // Above line is not optimized (several a SHR are used), we have 5 non-overlapping WORDs, or 3 overlapping WORDs, within 4 overlapping DWORDs so: // [2x][2a][2b][2c][2d] // DWORD #4 // [2a] (*(uint32_t *)&pbTarget[i+cbPattern-1-1-2-6]>>16) = !SHR to be avoided! <-- // [2x] (*(uint32_t *)&pbTarget[i+cbPattern-1-1-2-6]&0xFFFF) = | // DWORD #3 | // [2b] (*(uint32_t *)&pbTarget[i+cbPattern-1-1-2-4]>>16) = !SHR to be avoided! |<-- // [2a] (*(uint32_t *)&pbTarget[i+cbPattern-1-1-2-4]&0xFFFF) = ------------------------ | // DWORD #2 | // [2c] (*(uint32_t *)&pbTarget[i+cbPattern-1-1-2-2]>>16) = !SHR to be avoided! |<-- // [2b] (*(uint32_t *)&pbTarget[i+cbPattern-1-1-2-2]&0xFFFF) = --------------------------- | // DWORD #1 | // [2d] (*(uint32_t *)&pbTarget[i+cbPattern-1-1-2-0]>>16) = | // [2c] (*(uint32_t *)&pbTarget[i+cbPattern-1-1-2-0]&0xFFFF) = ------------------------------ // // So in order to remove 3 SHR instructions the equal extractions are: // DWORD #4 // [2a] (*(uint32_t *)&pbTarget[i+cbPattern-1-1-2-4]&0xFFFF) = !SHR to be avoided! <-- // [2x] (*(uint32_t *)&pbTarget[i+cbPattern-1-1-2-6]&0xFFFF) = | // DWORD #3 | // [2b] (*(uint32_t *)&pbTarget[i+cbPattern-1-1-2-2]&0xFFFF) = !SHR to be avoided! |<-- // [2a] (*(uint32_t *)&pbTarget[i+cbPattern-1-1-2-4]&0xFFFF) = ------------------------ | // DWORD #2 | // [2c] (*(uint32_t *)&pbTarget[i+cbPattern-1-1-2-0]&0xFFFF) = !SHR to be avoided! |<-- // [2b] (*(uint32_t *)&pbTarget[i+cbPattern-1-1-2-2]&0xFFFF) = --------------------------- | // DWORD #1 | // [2d] (*(uint32_t *)&pbTarget[i+cbPattern-1-1-2-0]>>16) = | // [2c] (*(uint32_t *)&pbTarget[i+cbPattern-1-1-2-0]&0xFFFF) = ------------------------------ //if ( ( bm_Horspool_Order2[( (*(uint32_t *)&pbTarget[i+cbPattern-1-1-2-4]&0xFFFF)+(*(uint32_t *)&pbTarget[i+cbPattern-1-1-2-6]&0xFFFF) ) & ( (1<<16)-1 )] ) + ( bm_Horspool_Order2[( (*(uint32_t *)&pbTarget[i+cbPattern-1-1-2-2]&0xFFFF)+(*(uint32_t *)&pbTarget[i+cbPattern-1-1-2-4]&0xFFFF) ) & ( (1<<16)-1 )] ) + ( bm_Horspool_Order2[( (*(uint32_t *)&pbTarget[i+cbPattern-1-1-2-0]&0xFFFF)+(*(uint32_t *)&pbTarget[i+cbPattern-1-1-2-2]&0xFFFF) ) & ( (1<<16)-1 )] ) < 3 ) Gulliver = cbPattern-(2-1)-2-6; else { // Since the above Decumanus mumbo-jumbo (3 overlapping lookups vs 2 non-overlapping lookups) is not fast enough we go DuoDecumanus or 3x4: // [2y][2x][2a][2b][2c][2d] // DWORD #3 // DWORD #2 // DWORD #1 //if ( ( bm_Horspool_Order2[( (*(uint32_t *)&pbTarget[i+cbPattern-1-1-2-4]>>16)+(*(uint32_t *)&pbTarget[i+cbPattern-1-1-2-4]&0xFFFF) ) & ( (1<<16)-1 )] ) + ( bm_Horspool_Order2[( (*(uint32_t *)&pbTarget[i+cbPattern-1-1-2-8]>>16)+(*(uint32_t *)&pbTarget[i+cbPattern-1-1-2-8]&0xFFFF) ) & ( (1<<16)-1 )] ) < 2 ) Gulliver = cbPattern-(2-1)-2-8; else { if ( *(uint32_t *)&pbTarget[i] == ulHashPattern) { // Order 4 [ // Let's try something "outrageous" like comparing with[out] overlap BBs 4bytes long instead of 1 byte back-to-back: // Inhere we are using order 4, 'cbPattern - Order + 1' is the number of BBs for text 'cbPattern' bytes long, for example, for cbPattern=11 'fastest fox' and Order=4 we have BBs = 11-4+1=8: //0:"fast" if the comparison failed here, 'count' is 1; 'Gulliver' is cbPattern-(4-1)-7 //1:"aste" if the comparison failed here, 'count' is 2; 'Gulliver' is cbPattern-(4-1)-6 //2:"stes" if the comparison failed here, 'count' is 3; 'Gulliver' is cbPattern-(4-1)-5 //3:"test" if the comparison failed here, 'count' is 4; 'Gulliver' is cbPattern-(4-1)-4 //4:"est " if the comparison failed here, 'count' is 5; 'Gulliver' is cbPattern-(4-1)-3 //5:"st f" if the comparison failed here, 'count' is 6; 'Gulliver' is cbPattern-(4-1)-2 //6:"t fo" if the comparison failed here, 'count' is 7; 'Gulliver' is cbPattern-(4-1)-1 //7:" fox" if the comparison failed here, 'count' is 8; 'Gulliver' is cbPattern-(4-1) count = cbPattern-4+1; // Below comparison is UNIdirectional: while ( count > 0 && *(uint32_t *)(pbPattern+count-1) == *(uint32_t *)(&pbTarget[i]+(count-1)) ) count = count-4; // count = cbPattern-4+1 = 23-4+1 = 20 // boomshakalakaZZZZZZ[ZZZZ] 20 // boomshakalakaZZ[ZZZZ]ZZZZ 20-4 // boomshakala[kaZZ]ZZZZZZZZ 20-8 = 12 // boomsha[kala]kaZZZZZZZZZZ 20-12 = 8 // boo[msha]kalakaZZZZZZZZZZ 20-16 = 4 // If we miss to hit then no need to compare the original: Needle if ( count <= 0 ) { // I have to add out-of-range checks... // i-(PRIMALposition-1) >= 0 // &pbTarget[i-(PRIMALposition-1)] <= pbTargetMax - 4 // i-(PRIMALposition-1)+(count-1) >= 0 // &pbTarget[i-(PRIMALposition-1)+(count-1)] <= pbTargetMax - 4 if ( (i-(PRIMALposition-1) >= 0) && (&pbTarget[i-(PRIMALposition-1)] <= pbTargetMax - 4) && (&pbTarget[i-(PRIMALposition-1)+(count-1)] <= pbTargetMax - 4) ) { if ( *(uint32_t *)&pbTarget[i-(PRIMALposition-1)] == *(uint32_t *)(pbPattern-(PRIMALposition-1))) { // This fast check ensures not missing a match (for remainder) when going under 0 in loop below: count = PRIMALlengthCANDIDATE-4+1; while ( count > 0 && *(uint32_t *)(pbPattern-(PRIMALposition-1)+count-1) == *(uint32_t *)(&pbTarget[i-(PRIMALposition-1)]+(count-1)) ) count = count-4; if ( count <= 0 ) return(pbTarget+i-(PRIMALposition-1)); } } } // In order to avoid only-left or only-right WCS the memcmp should be done as left-to-right and right-to-left AT THE SAME TIME. // Below comparison is BIdirectional. It pays off when needle is 8+++ long: // for (count = cbPattern-4+1; count > 0; count = count-4) { // if ( *(uint32_t *)(pbPattern+count-1) != *(uint32_t *)(&pbTarget[i]+(count-1)) ) {break;}; // if ( *(uint32_t *)(pbPattern+(cbPattern-4+1)-count) != *(uint32_t *)(&pbTarget[i]+(cbPattern-4+1)-count) ) {count = (cbPattern-4+1)-count +(1); break;} // +(1) because two lookups are implemented as one, also no danger of 'count' being 0 because of the fast check outwith the 'while': if ( *(uint32_t *)&pbTarget[i] == ulHashPattern) // } // if ( count <= 0 ) return(pbTarget+i); // Checking the order 2 pairs in mismatched DWORD, all the 3: //if ( bm_Horspool_Order2[*(unsigned short *)&pbTarget[i+count-1]] == 0 ) Gulliver = count; // 1 or bigger, as it should //if ( bm_Horspool_Order2[*(unsigned short *)&pbTarget[i+count-1+1]] == 0 ) Gulliver = count+1; // 1 or bigger, as it should //if ( bm_Horspool_Order2[*(unsigned short *)&pbTarget[i+count-1+1+1]] == 0 ) Gulliver = count+1+1; // 1 or bigger, as it should // if ( bm_Horspool_Order2[*(unsigned short *)&pbTarget[i+count-1]] + bm_Horspool_Order2[*(unsigned short *)&pbTarget[i+count-1+1]] + bm_Horspool_Order2[*(unsigned short *)&pbTarget[i+count-1+1+1]] < 3 ) Gulliver = count; // 1 or bigger, as it should, THE MIN(count,count+1,count+1+1) // Above compound 'if' guarantees not that Gulliver > 1, an example: // Needle: fastest tax // Window: ...fastast tax... // After matching ' tax' vs ' tax' and 'fast' vs 'fast' the mismathced DWORD is 'test' vs 'tast': // 'tast' when factorized down to order 2 yields: 'ta','as','st' - all the three when summed give 1+1+1=3 i.e. Gulliver remains 1. // Roughly speaking, this attempt maybe has its place in worst-case scenarios but not in English text and even not in ACGT data, that's why I commented it in original 'Shockeroo'. //if ( bm_Horspool_Order2[( (*(uint32_t *)&pbTarget[i+count-1]>>16)+(*(uint32_t *)&pbTarget[i+count-1]&0xFFFF) ) & ( (1<<16)-1 )] == 0 ) Gulliver = count; // 1 or bigger, as it should // Above line is replaced by next one with better hashing: // if ( bm_Horspool_Order2[( (*(uint32_t *)&pbTarget[i+count-1]>>(16-1))+(*(uint32_t *)&pbTarget[i+count-1]&0xFFFF) ) & ( (1<<16)-1 )] == 0 ) Gulliver = count; // 1 or bigger, as it should // Order 4 ] } } } else Gulliver = cbPattern-(2-1)-2; // -2 because we check the 4 rightmost bytes not 2. i = i + Gulliver; //GlobalI++; // Comment it, it is only for stats. } return(NULL); } // if ( cbPattern<=NeedleThreshold2vs4swampLITE ) } // if ( cbPattern<=NeedleThreshold2vs4swampLITE ) } //if ( cbPattern<4 ) } // Last change: 2014-Apr-14 // If you want to help me to improve it, email me at: sanmayce@sanmayce.com // Enfun! |
Changes to src/simple_codecs.cpp.
︙ | ︙ | |||
221 222 223 224 225 226 227 | char key[16] = {0}; uhash_ctx_t ctx = uhash_alloc(key); // FIXME: what if malloc fails? ::uhash(ctx, in, isize, out); uhash_free(ctx); memcpy(in + isize, backup, sizeof(backup)); } | | | 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 | char key[16] = {0}; uhash_ctx_t ctx = uhash_alloc(key); // FIXME: what if malloc fails? ::uhash(ctx, in, isize, out); uhash_free(ctx); memcpy(in + isize, backup, sizeof(backup)); } // TODO: uhash doesn't support blocks > 16 MB void umac(char * in, size_t isize, char * out) { // uhash may overwrite data after the input // That's where fsbench stores checksum // If this function is called during 'decoding', // there's a hash stored already that will be destroyed, |
︙ | ︙ | |||
647 648 649 650 651 652 653 654 655 656 657 658 659 660 | MurmurHash3_x86_128(in, isize, 0, out); } void murmur_x64_128(char * in, size_t isize, char * out) { MurmurHash3_x64_128(in, isize, 0, out); } #endif//FSBENCH_USE_MURMUR #ifdef FSBENCH_USE_QUICKLZZIP extern "C" { #include "quicklzzip.h" } size_t qlzzip_c(char * in, size_t isize, char * out, size_t, void *) | > > > > > > > > > > > > > > > > | 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 | MurmurHash3_x86_128(in, isize, 0, out); } void murmur_x64_128(char * in, size_t isize, char * out) { MurmurHash3_x64_128(in, isize, 0, out); } #endif//FSBENCH_USE_MURMUR #ifdef FSBENCH_USE_NAKAMICHI extern "C" { unsigned int Compress(char* ret, char* src, unsigned int srcSize); unsigned int Decompress(char* ret, char* src, unsigned int srcSize); } size_t nakamichi_c(char * in, size_t isize, char * out, size_t, void *) { return Compress(out, in, isize); } size_t nakamichi_d(char * in, size_t isize, char * out, size_t osize, void *) { return Decompress(out, in, isize); } #endif//FSBENCH_USE_NAKAMICHI #ifdef FSBENCH_USE_QUICKLZZIP extern "C" { #include "quicklzzip.h" } size_t qlzzip_c(char * in, size_t isize, char * out, size_t, void *) |
︙ | ︙ |
Changes to src/simple_codecs.hpp.
︙ | ︙ | |||
124 125 126 127 128 129 130 131 132 133 134 135 136 137 | size_t mmini_lzl_d(char * in, size_t isize, char * out, size_t osize, void * _); #endif// FSBENCH_USE_MMINI #ifdef FSBENCH_USE_MURMUR void murmur_x86_32(char * in, size_t isize, char * out); void murmur_x86_128(char * in, size_t isize, char * out); void murmur_x64_128(char * in, size_t isize, char * out); #endif//FSBENCH_USE_MURMUR #ifdef FSBENCH_USE_QUICKLZZIP size_t qlzzip_c(char * in, size_t isize, char * out, size_t osize, void * _); #endif//FSBENCH_USE_QUICKLZZIP #ifdef FSBENCH_USE_RLE64 size_t RLE64_c(char * in, size_t isize, char * out, size_t osize, void * _); size_t RLE64_d(char * in, size_t isize, char * out, size_t osize, void * _); size_t RLE32_c(char * in, size_t isize, char * out, size_t osize, void * _); | > > > > | 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 | size_t mmini_lzl_d(char * in, size_t isize, char * out, size_t osize, void * _); #endif// FSBENCH_USE_MMINI #ifdef FSBENCH_USE_MURMUR void murmur_x86_32(char * in, size_t isize, char * out); void murmur_x86_128(char * in, size_t isize, char * out); void murmur_x64_128(char * in, size_t isize, char * out); #endif//FSBENCH_USE_MURMUR #ifdef FSBENCH_USE_NAKAMICHI size_t nakamichi_c(char * in, size_t isize, char * out, size_t, void *); size_t nakamichi_d(char * in, size_t isize, char * out, size_t osize, void *); #endif//FSBENCH_USE_NAKAMICHI #ifdef FSBENCH_USE_QUICKLZZIP size_t qlzzip_c(char * in, size_t isize, char * out, size_t osize, void * _); #endif//FSBENCH_USE_QUICKLZZIP #ifdef FSBENCH_USE_RLE64 size_t RLE64_c(char * in, size_t isize, char * out, size_t osize, void * _); size_t RLE64_d(char * in, size_t isize, char * out, size_t osize, void * _); size_t RLE32_c(char * in, size_t isize, char * out, size_t osize, void * _); |
︙ | ︙ |