/* * Copyright © 2012 Google, Inc. * * This is part of HarfBuzz, a text shaping library. * * Permission is hereby granted, without written agreement and without * license or royalty fees, to use, copy, modify, and distribute this * software and its documentation for any purpose, provided that the * above copyright notice and the following two paragraphs appear in * all copies of this software. * * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. * * Google Author(s): Behdad Esfahbod */ #ifndef HB_SET_DIGEST_HH #define HB_SET_DIGEST_HH #include "hb.hh" #include "hb-machinery.hh" /* * The set-digests implement "filters" that support "approximate * member query". Conceptually these are like Bloom Filter and * Quotient Filter, however, much smaller, faster, and designed * to fit the requirements of our uses for glyph coverage queries. * * Our filters are highly accurate if the lookup covers fairly local * set of glyphs, but fully flooded and ineffective if coverage is * all over the place. * * The way these are used is that the filter is first populated by * a lookup's or subtable's Coverage table(s), and then when we * want to apply the lookup or subtable to a glyph, before trying * to apply, we ask the filter if the glyph may be covered. If it's * not, we return early. We can also match a digest against another * digest. * * We use these filters at three levels: * - If the digest for all the glyphs in the buffer as a whole * does not match the digest for the lookup, skip the lookup. * - For each glyph, if it doesn't match the lookup digest, * skip it. * - For each glyph, if it doesn't match the subtable digest, * skip it. * * The main filter we use is a combination of four bits-pattern * filters. A bits-pattern filter checks a number of bits (5 or 6) * of the input number (glyph-id in this case) and checks whether * its pattern is amongst the patterns of any of the accepted values. * The accepted patterns are represented as a "long" integer. The * check is done using four bitwise operations only. */ static constexpr unsigned hb_set_digest_shifts[] = {4, 0, 6}; struct hb_set_digest_t { // No science in these. Intuition and testing only. using mask_t = uint64_t; static constexpr unsigned n = ARRAY_LENGTH_CONST (hb_set_digest_shifts); static constexpr unsigned mask_bytes = sizeof (mask_t); static constexpr unsigned mask_bits = sizeof (mask_t) * 8; static constexpr hb_codepoint_t mb1 = mask_bits - 1; static constexpr mask_t one = 1; static constexpr mask_t all = (mask_t) -1; void init () { for (unsigned i = 0; i < n; i++) masks[i] = 0; } void clear () { init (); } static hb_set_digest_t full () { hb_set_digest_t d; for (unsigned i = 0; i < n; i++) d.masks[i] = all; return d; } void union_ (const hb_set_digest_t &o) { for (unsigned i = 0; i < n; i++) masks[i] |= o.masks[i]; } bool add_range (hb_codepoint_t a, hb_codepoint_t b) { bool ret; ret = false; for (unsigned i = 0; i < n; i++) if (masks[i] != all) ret = true; if (!ret) return false; ret = false; for (unsigned i = 0; i < n; i++) { mask_t shift = hb_set_digest_shifts[i]; if ((b >> shift) - (a >> shift) >= mb1) masks[i] = all; else { mask_t ma = one << ((a >> shift) & mb1); mask_t mb = one << ((b >> shift) & mb1); masks[i] |= mb + (mb - ma) - (mb < ma); ret = true; } } return ret; } template void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) { for (unsigned int i = 0; i < count; i++) { add (*array); array = &StructAtOffsetUnaligned ((const void *) array, stride); } } template void add_array (const hb_array_t& arr) { add_array (&arr, arr.len ()); } template bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) { add_array (array, count, stride); return true; } template bool add_sorted_array (const hb_sorted_array_t& arr) { return add_sorted_array (&arr, arr.len ()); } bool operator [] (hb_codepoint_t g) const { return may_have (g); } void add (hb_codepoint_t g) { for (unsigned i = 0; i < n; i++) masks[i] |= one << ((g >> hb_set_digest_shifts[i]) & mb1); } HB_ALWAYS_INLINE bool may_have (hb_codepoint_t g) const { for (unsigned i = 0; i < n; i++) if (!(masks[i] & (one << ((g >> hb_set_digest_shifts[i]) & mb1)))) return false; return true; } bool may_intersect (const hb_set_digest_t &o) const { for (unsigned i = 0; i < n; i++) if (!(masks[i] & o.masks[i])) return false; return true; } private: mask_t masks[n] = {}; }; #endif /* HB_SET_DIGEST_HH */