dependencies

org.clojure/clojure

1.6.0

(this space intentionally left almost blank)

namespaces

sparse.core
sparse.utils

sparse.core

(ns sparse.core
  (require [sparse.utils :refer :all]))

Turns a value within the range zero to range (inclusive) into a sparse string of 1s and 0s that has length size and bits number of bits set.

Throws assertion error for various situations:

Asking for a zero size sparse array.
Trying to set more bits than there are in the sparse array.
Asking for a range that is less than or equal to zero.
Asking for a value that is not from zero to the allowed range.

(defn long->sparse
  [^long size ^long bits ^long value ^long range]
  (assert (pos? size) "Size of sparse array must be greater than zero.")
  (assert (<= bits size) "Number of bits to set in sparse array must be less than or equal to the size of the sparse array.")
  (assert (>= range 1) (str "Range must be greater than zero"))
  (assert (and (>= value 0) (<= value range)) "Value must be between zero and range inclusive.")
  (let [sparse-block-size (Math/floor (/ size bits))
        sparse-starting-block-size (- size (* sparse-block-size bits))
        ranges (long->bit-ranges range bits)
        ranges-bit-count (reduce + ranges)
        value-bits (long->bitstring value ranges-bit-count)
        value-blocks (split-bitstring value-bits ranges)
        sparse-blocks (map #(bitstring->single-bit-bitstring sparse-block-size %) value-blocks)]
    (str (zeros sparse-starting-block-size) (reduce str "" sparse-blocks))))

(defn sparse->long
  [sparse ^long range]
  (assert (>= range 1) "Range must be greater than zero")
  (assert (valid-sparse? sparse) "That's not a sparse sequence")
  (let [size (count sparse)
        bits (reduce + sparse)
        sparse-block-size (Math/floor (/ size bits))
        sparse-starting-block-size (- size (* sparse-block-size bits))
        ranges (long->bit-ranges range bits)]))

sparse.utils

toc

(ns sparse.utils)

Returns the bit to set in an array of bits of length size which represents the value val's position in the range zero to range.

For example if the number of bits is 10 and the range is 99, the value 0 has the bit position 0, representing the bit array 0000000001.

If the value is 99 the bit position is 9, representing 1000000000.

If the value is 33, the bit position is 3, representing 0000001000..

Of course if the number range is large and the bit array is small, many numbers will be represented by each bit position. If the number range is the same as the number of bits, e.g. the number of bits is 100 and the number range is (zero to) 99, then one bit represents each number - bit 13 represents the number 13, and bit 0 represents the number zero.

If the number range is less than the number of bits, the value is arrived at by multiplying range and value by size / range, making the range the same as the number of bits and increasing the value so it fits across the whole range.

Throws AssertionError if value is outside the range, or value < 0, or range <= 0

(defn bit-to-set
  [^long size val range]
  (assert (<= val range) "Value must be <= the range.")
  (assert (>= val 0) "Value must be >= zero.")
  (assert (pos? range) "Range must be > zero.")
  (if (>= range size)
    (let [max-bit (dec size)
          result (->
                  (/ val range)
                  (* size)
                  (Math/floor)
                  (int))]
      (if (> result max-bit) ; at limit result will be too high.
        max-bit
        result))
    (bit-to-set size (* val (/ size range)) size)))

(defn zeros
  [n]
  (reduce str  (repeat n "0")))

Returns a length size bitstring with a single bit set that represents the position of the number value in the range (zreo to range inclusive).

(defn num->single-bit-bitstring
  [size val range]
  (let [bit-to-set (bit-to-set size val range)]
    (reduce (fn [s x] (str s (if (= x bit-to-set) "1" "0"))) "" (reverse (clojure.core/range 0 size)))))

Take bitstring, transform it to a number, work out the maximum long value of a bit string of its size and then calculate the single-bit bitstring of length size that represents the number.

(defn bitstring->single-bit-bitstring
  [size bitstring]
  (let [val (Long/parseLong bitstring 2)
        range (Long/parseLong (reduce str (repeat (count bitstring) "1")) 2)]
    (num->single-bit-bitstring size val range)))

(defn bits->binarystring [size bits] (reduce (fn [s x] (str s (if (contains? bits x) "1" "0"))) "" (reverse (range size))))

Turn a long value 'l' into a string of bits. In the second form, the number of bits 'b' can be specified with zero padding occurring where necessary. The number of bits 'b' must be sufficient to fit the number.

(defn long->bitstring
  ([^long l]
     (Long/toBinaryString l))
  ([^long l ^long b]
     (let [bits (long->bitstring l)
           prefix-bit-count (- b (count bits))]
       (assert (>= prefix-bit-count 0) "Number of bits must be enough to contain the value.")
       (str (apply str (take prefix-bit-count (repeat "0")))
            bits))))

The number of bits required to represent the long value.

(defn- long->bit-count
  [^long l]
  (count (Long/toBinaryString l)))

Look at how many bits are required to represent the value 'l' in binary and returns a sequence of numbers specifying the size of 'n' bit blocks into which the number's bits can be split. Because the number can't always be split into bit blocks of the same size, any additional bits are added to the initial blocks.

For example, the number 127 can be represented with 7 bits. If there are two blocks, the first with have 4 bits and the second with have 3.

(long->bitranges 127, 2) => (4, 3)

If the number of bits required to represent the number is lower than 'n' the number of blocks requested, the ranges returned will be all of one bit size and there will be 'n' of them.

(defn long->bit-ranges
  [^long l ^long n]
  (assert (pos? n) "The number of blocks must be greater than zero.")
  (let [range-bit-count (long->bit-count l)
        range-block-size (long (Math/floor (/ range-bit-count n)))
        remainder (- range-bit-count (* range-block-size n))]
    (if (< range-bit-count n)
      (take n (repeat 1))
      (map +
           (take n (repeat range-block-size))
           (concat (take remainder (repeat 1))
                   (take (- n remainder) (repeat 0)))))))

Function supports the splitting of a string into smaller strings. 'v' is a sequence of one or more strings where the last string is to be split into two strings, the first of length 'n'. The result returned is a sequence idential to the original sequence but with the last item split into two parts.

(defn- split-bitstring-fn
  [v n]
  (let [starters (take (dec (count v)) v)
        last (last v)
        last-1 (subs last 0 n)
        last-2 (subs last n)]
    (flatten (conj '() last-2 last-1 starters))))

Split the bitstring 's' into a sequence of bitstrings of sizes defined in ranges 'r'.

(defn split-bitstring
  [s r]
  (assert (== (count s) (reduce + r)) "The total size of the sub-bitstrings must be the same as the size of the initial bitstring")
  (assert (reduce #(and %1 (>= %2 1)) true r) "Sub-bitstrings must be of size 1 or above.")
  (assert (not= 0 (count s)) "There must be a non-empty bitstring to split.")
  (reduce split-bitstring-fn [s] (drop-last r)))

Check that a sparse sequence is valid, i.e. only contains 1s and 0s or is empty.

(defn valid-sparse?
  [sparse]
  (not (some #(not (or (= 1 %) (= 0 %))) sparse)))

Go from a sparse bit sequence back to the original number (or a possible range as the answer often won't be precise).

Turn the position of a bit in a particular size sequence into the original value, or more precisely into three values: the highest possible value that could be represented by this bit position, the mid-value and the lowest possible value.

Suppose that an 8 bit sparse array represents a value in the range zero to 100,000. Fairly obviously, one bit represents a large range of values. In this case, the sparse sequence (1 0 0 0 0 0 0 0) can have been generated from numbers from 100,000 down to 87,500 and the mid-point value is 93,750.

So, sparse arrays that a small relative to the number that they encode and rather imprecise. However, sparse arrays that are large relative to the number that they encode can be much more precise.

(defn bit-pos->num
  [^long size ^long pos ^long range]
  (let [top (int (Math/floor (/ (* range pos) size)))
        bottom (int (Math/ceil (/ (* range (dec pos)) size)))
        mid (int (Math/floor (/ (+ top bottom) 2)))]
    [top mid bottom]))

(defn index-of-first-one
  [bit-seq]
  (first (keep-indexed #(when (= 1 %2) %1) bit-seq)))

(defn set-bit-pos
  [bit-seq]
  (- (count bit-seq) (index-of-first-one bit-seq)))

Turns a bit sequence with a single bit set back into a value within the specified range

(defn single-bit-in-seq->num
  [bit-seq ^long range]
  (second (bit-pos->num (count bit-seq) (set-bit-pos bit-seq) range)))

(defn long->bit-seq
  ([^long l]
     (map #(- (long %) 48) (Long/toBinaryString l)))
  ([^long l ^long b]
     (let [bit-seq (long->bit-seq l)
           prefix-bit-count (- b (count bit-seq))]
       (assert (>= prefix-bit-count 0) "Number of bits must be enough to contain the value.")
       (flatten (conj bit-seq
                      (take prefix-bit-count (repeat 0)))))))

sparse

0.1.2-SNAPSHOT

dependencies

namespaces

sparse.core

sparse.utils