mirror of
https://codeberg.org/forgejo/forgejo
synced 2024-11-25 03:06:10 +01:00
894 lines
23 KiB
Go
894 lines
23 KiB
Go
package roaring
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/binary"
|
|
"fmt"
|
|
"io"
|
|
"io/ioutil"
|
|
|
|
snappy "github.com/glycerine/go-unsnap-stream"
|
|
"github.com/tinylib/msgp/msgp"
|
|
)
|
|
|
|
//go:generate msgp -unexported
|
|
|
|
type container interface {
|
|
clone() container
|
|
and(container) container
|
|
andCardinality(container) int
|
|
iand(container) container // i stands for inplace
|
|
andNot(container) container
|
|
iandNot(container) container // i stands for inplace
|
|
getCardinality() int
|
|
// rank returns the number of integers that are
|
|
// smaller or equal to x. rank(infinity) would be getCardinality().
|
|
rank(uint16) int
|
|
|
|
iadd(x uint16) bool // inplace, returns true if x was new.
|
|
iaddReturnMinimized(uint16) container // may change return type to minimize storage.
|
|
|
|
//addRange(start, final int) container // range is [firstOfRange,lastOfRange) (unused)
|
|
iaddRange(start, endx int) container // i stands for inplace, range is [firstOfRange,endx)
|
|
|
|
iremove(x uint16) bool // inplace, returns true if x was present.
|
|
iremoveReturnMinimized(uint16) container // may change return type to minimize storage.
|
|
|
|
not(start, final int) container // range is [firstOfRange,lastOfRange)
|
|
inot(firstOfRange, endx int) container // i stands for inplace, range is [firstOfRange,endx)
|
|
xor(r container) container
|
|
getShortIterator() shortIterable
|
|
getManyIterator() manyIterable
|
|
contains(i uint16) bool
|
|
maximum() uint16
|
|
minimum() uint16
|
|
|
|
// equals is now logical equals; it does not require the
|
|
// same underlying container types, but compares across
|
|
// any of the implementations.
|
|
equals(r container) bool
|
|
|
|
fillLeastSignificant16bits(array []uint32, i int, mask uint32)
|
|
or(r container) container
|
|
orCardinality(r container) int
|
|
isFull() bool
|
|
ior(r container) container // i stands for inplace
|
|
intersects(r container) bool // whether the two containers intersect
|
|
lazyOR(r container) container
|
|
lazyIOR(r container) container
|
|
getSizeInBytes() int
|
|
//removeRange(start, final int) container // range is [firstOfRange,lastOfRange) (unused)
|
|
iremoveRange(start, final int) container // i stands for inplace, range is [firstOfRange,lastOfRange)
|
|
selectInt(x uint16) int // selectInt returns the xth integer in the container
|
|
serializedSizeInBytes() int
|
|
readFrom(io.Reader) (int, error)
|
|
writeTo(io.Writer) (int, error)
|
|
|
|
numberOfRuns() int
|
|
toEfficientContainer() container
|
|
String() string
|
|
containerType() contype
|
|
}
|
|
|
|
type contype uint8
|
|
|
|
const (
|
|
bitmapContype contype = iota
|
|
arrayContype
|
|
run16Contype
|
|
run32Contype
|
|
)
|
|
|
|
// careful: range is [firstOfRange,lastOfRange]
|
|
func rangeOfOnes(start, last int) container {
|
|
if start > MaxUint16 {
|
|
panic("rangeOfOnes called with start > MaxUint16")
|
|
}
|
|
if last > MaxUint16 {
|
|
panic("rangeOfOnes called with last > MaxUint16")
|
|
}
|
|
if start < 0 {
|
|
panic("rangeOfOnes called with start < 0")
|
|
}
|
|
if last < 0 {
|
|
panic("rangeOfOnes called with last < 0")
|
|
}
|
|
return newRunContainer16Range(uint16(start), uint16(last))
|
|
}
|
|
|
|
type roaringArray struct {
|
|
keys []uint16
|
|
containers []container `msg:"-"` // don't try to serialize directly.
|
|
needCopyOnWrite []bool
|
|
copyOnWrite bool
|
|
|
|
// conserz is used at serialization time
|
|
// to serialize containers. Otherwise empty.
|
|
conserz []containerSerz
|
|
}
|
|
|
|
// containerSerz facilitates serializing container (tricky to
|
|
// serialize because it is an interface) by providing a
|
|
// light wrapper with a type identifier.
|
|
type containerSerz struct {
|
|
t contype `msg:"t"` // type
|
|
r msgp.Raw `msg:"r"` // Raw msgpack of the actual container type
|
|
}
|
|
|
|
func newRoaringArray() *roaringArray {
|
|
return &roaringArray{}
|
|
}
|
|
|
|
// runOptimize compresses the element containers to minimize space consumed.
|
|
// Q: how does this interact with copyOnWrite and needCopyOnWrite?
|
|
// A: since we aren't changing the logical content, just the representation,
|
|
// we don't bother to check the needCopyOnWrite bits. We replace
|
|
// (possibly all) elements of ra.containers in-place with space
|
|
// optimized versions.
|
|
func (ra *roaringArray) runOptimize() {
|
|
for i := range ra.containers {
|
|
ra.containers[i] = ra.containers[i].toEfficientContainer()
|
|
}
|
|
}
|
|
|
|
func (ra *roaringArray) appendContainer(key uint16, value container, mustCopyOnWrite bool) {
|
|
ra.keys = append(ra.keys, key)
|
|
ra.containers = append(ra.containers, value)
|
|
ra.needCopyOnWrite = append(ra.needCopyOnWrite, mustCopyOnWrite)
|
|
}
|
|
|
|
func (ra *roaringArray) appendWithoutCopy(sa roaringArray, startingindex int) {
|
|
mustCopyOnWrite := sa.needCopyOnWrite[startingindex]
|
|
ra.appendContainer(sa.keys[startingindex], sa.containers[startingindex], mustCopyOnWrite)
|
|
}
|
|
|
|
func (ra *roaringArray) appendCopy(sa roaringArray, startingindex int) {
|
|
// cow only if the two request it, or if we already have a lightweight copy
|
|
copyonwrite := (ra.copyOnWrite && sa.copyOnWrite) || sa.needsCopyOnWrite(startingindex)
|
|
if !copyonwrite {
|
|
// since there is no copy-on-write, we need to clone the container (this is important)
|
|
ra.appendContainer(sa.keys[startingindex], sa.containers[startingindex].clone(), copyonwrite)
|
|
} else {
|
|
ra.appendContainer(sa.keys[startingindex], sa.containers[startingindex], copyonwrite)
|
|
if !sa.needsCopyOnWrite(startingindex) {
|
|
sa.setNeedsCopyOnWrite(startingindex)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (ra *roaringArray) appendWithoutCopyMany(sa roaringArray, startingindex, end int) {
|
|
for i := startingindex; i < end; i++ {
|
|
ra.appendWithoutCopy(sa, i)
|
|
}
|
|
}
|
|
|
|
func (ra *roaringArray) appendCopyMany(sa roaringArray, startingindex, end int) {
|
|
for i := startingindex; i < end; i++ {
|
|
ra.appendCopy(sa, i)
|
|
}
|
|
}
|
|
|
|
func (ra *roaringArray) appendCopiesUntil(sa roaringArray, stoppingKey uint16) {
|
|
// cow only if the two request it, or if we already have a lightweight copy
|
|
copyonwrite := ra.copyOnWrite && sa.copyOnWrite
|
|
|
|
for i := 0; i < sa.size(); i++ {
|
|
if sa.keys[i] >= stoppingKey {
|
|
break
|
|
}
|
|
thiscopyonewrite := copyonwrite || sa.needsCopyOnWrite(i)
|
|
if thiscopyonewrite {
|
|
ra.appendContainer(sa.keys[i], sa.containers[i], thiscopyonewrite)
|
|
if !sa.needsCopyOnWrite(i) {
|
|
sa.setNeedsCopyOnWrite(i)
|
|
}
|
|
|
|
} else {
|
|
// since there is no copy-on-write, we need to clone the container (this is important)
|
|
ra.appendContainer(sa.keys[i], sa.containers[i].clone(), thiscopyonewrite)
|
|
|
|
}
|
|
}
|
|
}
|
|
|
|
func (ra *roaringArray) appendCopiesAfter(sa roaringArray, beforeStart uint16) {
|
|
// cow only if the two request it, or if we already have a lightweight copy
|
|
copyonwrite := ra.copyOnWrite && sa.copyOnWrite
|
|
|
|
startLocation := sa.getIndex(beforeStart)
|
|
if startLocation >= 0 {
|
|
startLocation++
|
|
} else {
|
|
startLocation = -startLocation - 1
|
|
}
|
|
|
|
for i := startLocation; i < sa.size(); i++ {
|
|
thiscopyonewrite := copyonwrite || sa.needsCopyOnWrite(i)
|
|
if thiscopyonewrite {
|
|
ra.appendContainer(sa.keys[i], sa.containers[i], thiscopyonewrite)
|
|
if !sa.needsCopyOnWrite(i) {
|
|
sa.setNeedsCopyOnWrite(i)
|
|
}
|
|
} else {
|
|
// since there is no copy-on-write, we need to clone the container (this is important)
|
|
ra.appendContainer(sa.keys[i], sa.containers[i].clone(), thiscopyonewrite)
|
|
|
|
}
|
|
}
|
|
}
|
|
|
|
func (ra *roaringArray) removeIndexRange(begin, end int) {
|
|
if end <= begin {
|
|
return
|
|
}
|
|
|
|
r := end - begin
|
|
|
|
copy(ra.keys[begin:], ra.keys[end:])
|
|
copy(ra.containers[begin:], ra.containers[end:])
|
|
copy(ra.needCopyOnWrite[begin:], ra.needCopyOnWrite[end:])
|
|
|
|
ra.resize(len(ra.keys) - r)
|
|
}
|
|
|
|
func (ra *roaringArray) resize(newsize int) {
|
|
for k := newsize; k < len(ra.containers); k++ {
|
|
ra.containers[k] = nil
|
|
}
|
|
|
|
ra.keys = ra.keys[:newsize]
|
|
ra.containers = ra.containers[:newsize]
|
|
ra.needCopyOnWrite = ra.needCopyOnWrite[:newsize]
|
|
}
|
|
|
|
func (ra *roaringArray) clear() {
|
|
ra.resize(0)
|
|
ra.copyOnWrite = false
|
|
ra.conserz = nil
|
|
}
|
|
|
|
func (ra *roaringArray) clone() *roaringArray {
|
|
|
|
sa := roaringArray{}
|
|
sa.copyOnWrite = ra.copyOnWrite
|
|
|
|
// this is where copyOnWrite is used.
|
|
if ra.copyOnWrite {
|
|
sa.keys = make([]uint16, len(ra.keys))
|
|
copy(sa.keys, ra.keys)
|
|
sa.containers = make([]container, len(ra.containers))
|
|
copy(sa.containers, ra.containers)
|
|
sa.needCopyOnWrite = make([]bool, len(ra.needCopyOnWrite))
|
|
|
|
ra.markAllAsNeedingCopyOnWrite()
|
|
sa.markAllAsNeedingCopyOnWrite()
|
|
|
|
// sa.needCopyOnWrite is shared
|
|
} else {
|
|
// make a full copy
|
|
|
|
sa.keys = make([]uint16, len(ra.keys))
|
|
copy(sa.keys, ra.keys)
|
|
|
|
sa.containers = make([]container, len(ra.containers))
|
|
for i := range sa.containers {
|
|
sa.containers[i] = ra.containers[i].clone()
|
|
}
|
|
|
|
sa.needCopyOnWrite = make([]bool, len(ra.needCopyOnWrite))
|
|
}
|
|
return &sa
|
|
}
|
|
|
|
// unused function:
|
|
//func (ra *roaringArray) containsKey(x uint16) bool {
|
|
// return (ra.binarySearch(0, int64(len(ra.keys)), x) >= 0)
|
|
//}
|
|
|
|
func (ra *roaringArray) getContainer(x uint16) container {
|
|
i := ra.binarySearch(0, int64(len(ra.keys)), x)
|
|
if i < 0 {
|
|
return nil
|
|
}
|
|
return ra.containers[i]
|
|
}
|
|
|
|
func (ra *roaringArray) getContainerAtIndex(i int) container {
|
|
return ra.containers[i]
|
|
}
|
|
|
|
func (ra *roaringArray) getFastContainerAtIndex(i int, needsWriteable bool) container {
|
|
c := ra.getContainerAtIndex(i)
|
|
switch t := c.(type) {
|
|
case *arrayContainer:
|
|
c = t.toBitmapContainer()
|
|
case *runContainer16:
|
|
if !t.isFull() {
|
|
c = t.toBitmapContainer()
|
|
}
|
|
case *bitmapContainer:
|
|
if needsWriteable && ra.needCopyOnWrite[i] {
|
|
c = ra.containers[i].clone()
|
|
}
|
|
}
|
|
return c
|
|
}
|
|
|
|
func (ra *roaringArray) getWritableContainerAtIndex(i int) container {
|
|
if ra.needCopyOnWrite[i] {
|
|
ra.containers[i] = ra.containers[i].clone()
|
|
ra.needCopyOnWrite[i] = false
|
|
}
|
|
return ra.containers[i]
|
|
}
|
|
|
|
func (ra *roaringArray) getIndex(x uint16) int {
|
|
// before the binary search, we optimize for frequent cases
|
|
size := len(ra.keys)
|
|
if (size == 0) || (ra.keys[size-1] == x) {
|
|
return size - 1
|
|
}
|
|
return ra.binarySearch(0, int64(size), x)
|
|
}
|
|
|
|
func (ra *roaringArray) getKeyAtIndex(i int) uint16 {
|
|
return ra.keys[i]
|
|
}
|
|
|
|
func (ra *roaringArray) insertNewKeyValueAt(i int, key uint16, value container) {
|
|
ra.keys = append(ra.keys, 0)
|
|
ra.containers = append(ra.containers, nil)
|
|
|
|
copy(ra.keys[i+1:], ra.keys[i:])
|
|
copy(ra.containers[i+1:], ra.containers[i:])
|
|
|
|
ra.keys[i] = key
|
|
ra.containers[i] = value
|
|
|
|
ra.needCopyOnWrite = append(ra.needCopyOnWrite, false)
|
|
copy(ra.needCopyOnWrite[i+1:], ra.needCopyOnWrite[i:])
|
|
ra.needCopyOnWrite[i] = false
|
|
}
|
|
|
|
func (ra *roaringArray) remove(key uint16) bool {
|
|
i := ra.binarySearch(0, int64(len(ra.keys)), key)
|
|
if i >= 0 { // if a new key
|
|
ra.removeAtIndex(i)
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
func (ra *roaringArray) removeAtIndex(i int) {
|
|
copy(ra.keys[i:], ra.keys[i+1:])
|
|
copy(ra.containers[i:], ra.containers[i+1:])
|
|
|
|
copy(ra.needCopyOnWrite[i:], ra.needCopyOnWrite[i+1:])
|
|
|
|
ra.resize(len(ra.keys) - 1)
|
|
}
|
|
|
|
func (ra *roaringArray) setContainerAtIndex(i int, c container) {
|
|
ra.containers[i] = c
|
|
}
|
|
|
|
func (ra *roaringArray) replaceKeyAndContainerAtIndex(i int, key uint16, c container, mustCopyOnWrite bool) {
|
|
ra.keys[i] = key
|
|
ra.containers[i] = c
|
|
ra.needCopyOnWrite[i] = mustCopyOnWrite
|
|
}
|
|
|
|
func (ra *roaringArray) size() int {
|
|
return len(ra.keys)
|
|
}
|
|
|
|
func (ra *roaringArray) binarySearch(begin, end int64, ikey uint16) int {
|
|
low := begin
|
|
high := end - 1
|
|
for low+16 <= high {
|
|
middleIndex := low + (high-low)/2 // avoid overflow
|
|
middleValue := ra.keys[middleIndex]
|
|
|
|
if middleValue < ikey {
|
|
low = middleIndex + 1
|
|
} else if middleValue > ikey {
|
|
high = middleIndex - 1
|
|
} else {
|
|
return int(middleIndex)
|
|
}
|
|
}
|
|
for ; low <= high; low++ {
|
|
val := ra.keys[low]
|
|
if val >= ikey {
|
|
if val == ikey {
|
|
return int(low)
|
|
}
|
|
break
|
|
}
|
|
}
|
|
return -int(low + 1)
|
|
}
|
|
|
|
func (ra *roaringArray) equals(o interface{}) bool {
|
|
srb, ok := o.(roaringArray)
|
|
if ok {
|
|
|
|
if srb.size() != ra.size() {
|
|
return false
|
|
}
|
|
for i, k := range ra.keys {
|
|
if k != srb.keys[i] {
|
|
return false
|
|
}
|
|
}
|
|
|
|
for i, c := range ra.containers {
|
|
if !c.equals(srb.containers[i]) {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
func (ra *roaringArray) headerSize() uint64 {
|
|
size := uint64(len(ra.keys))
|
|
if ra.hasRunCompression() {
|
|
if size < noOffsetThreshold { // for small bitmaps, we omit the offsets
|
|
return 4 + (size+7)/8 + 4*size
|
|
}
|
|
return 4 + (size+7)/8 + 8*size // - 4 because we pack the size with the cookie
|
|
}
|
|
return 4 + 4 + 8*size
|
|
|
|
}
|
|
|
|
// should be dirt cheap
|
|
func (ra *roaringArray) serializedSizeInBytes() uint64 {
|
|
answer := ra.headerSize()
|
|
for _, c := range ra.containers {
|
|
answer += uint64(c.serializedSizeInBytes())
|
|
}
|
|
return answer
|
|
}
|
|
|
|
//
|
|
// spec: https://github.com/RoaringBitmap/RoaringFormatSpec
|
|
//
|
|
func (ra *roaringArray) toBytes() ([]byte, error) {
|
|
stream := &bytes.Buffer{}
|
|
hasRun := ra.hasRunCompression()
|
|
isRunSizeInBytes := 0
|
|
cookieSize := 8
|
|
if hasRun {
|
|
cookieSize = 4
|
|
isRunSizeInBytes = (len(ra.keys) + 7) / 8
|
|
}
|
|
descriptiveHeaderSize := 4 * len(ra.keys)
|
|
preambleSize := cookieSize + isRunSizeInBytes + descriptiveHeaderSize
|
|
|
|
buf := make([]byte, preambleSize+4*len(ra.keys))
|
|
|
|
nw := 0
|
|
|
|
if hasRun {
|
|
binary.LittleEndian.PutUint16(buf[0:], uint16(serialCookie))
|
|
nw += 2
|
|
binary.LittleEndian.PutUint16(buf[2:], uint16(len(ra.keys)-1))
|
|
nw += 2
|
|
|
|
// compute isRun bitmap
|
|
var ir []byte
|
|
|
|
isRun := newBitmapContainer()
|
|
for i, c := range ra.containers {
|
|
switch c.(type) {
|
|
case *runContainer16:
|
|
isRun.iadd(uint16(i))
|
|
}
|
|
}
|
|
// convert to little endian
|
|
ir = isRun.asLittleEndianByteSlice()[:isRunSizeInBytes]
|
|
nw += copy(buf[nw:], ir)
|
|
} else {
|
|
binary.LittleEndian.PutUint32(buf[0:], uint32(serialCookieNoRunContainer))
|
|
nw += 4
|
|
binary.LittleEndian.PutUint32(buf[4:], uint32(len(ra.keys)))
|
|
nw += 4
|
|
}
|
|
|
|
// descriptive header
|
|
for i, key := range ra.keys {
|
|
binary.LittleEndian.PutUint16(buf[nw:], key)
|
|
nw += 2
|
|
c := ra.containers[i]
|
|
binary.LittleEndian.PutUint16(buf[nw:], uint16(c.getCardinality()-1))
|
|
nw += 2
|
|
}
|
|
|
|
startOffset := int64(preambleSize + 4*len(ra.keys))
|
|
if !hasRun || (len(ra.keys) >= noOffsetThreshold) {
|
|
// offset header
|
|
for _, c := range ra.containers {
|
|
binary.LittleEndian.PutUint32(buf[nw:], uint32(startOffset))
|
|
nw += 4
|
|
switch rc := c.(type) {
|
|
case *runContainer16:
|
|
startOffset += 2 + int64(len(rc.iv))*4
|
|
default:
|
|
startOffset += int64(getSizeInBytesFromCardinality(c.getCardinality()))
|
|
}
|
|
}
|
|
}
|
|
|
|
_, err := stream.Write(buf[:nw])
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
for i, c := range ra.containers {
|
|
_ = i
|
|
_, err := c.writeTo(stream)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
return stream.Bytes(), nil
|
|
}
|
|
|
|
//
|
|
// spec: https://github.com/RoaringBitmap/RoaringFormatSpec
|
|
//
|
|
func (ra *roaringArray) writeTo(out io.Writer) (int64, error) {
|
|
by, err := ra.toBytes()
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
n, err := out.Write(by)
|
|
if err == nil && n < len(by) {
|
|
err = io.ErrShortWrite
|
|
}
|
|
return int64(n), err
|
|
}
|
|
|
|
func (ra *roaringArray) fromBuffer(buf []byte) (int64, error) {
|
|
pos := 0
|
|
if len(buf) < 8 {
|
|
return 0, fmt.Errorf("buffer too small, expecting at least 8 bytes, was %d", len(buf))
|
|
}
|
|
|
|
cookie := binary.LittleEndian.Uint32(buf)
|
|
pos += 4
|
|
var size uint32 // number of containers
|
|
haveRunContainers := false
|
|
var isRunBitmap []byte
|
|
|
|
// cookie header
|
|
if cookie&0x0000FFFF == serialCookie {
|
|
haveRunContainers = true
|
|
size = uint32(uint16(cookie>>16) + 1) // number of containers
|
|
|
|
// create is-run-container bitmap
|
|
isRunBitmapSize := (int(size) + 7) / 8
|
|
if pos+isRunBitmapSize > len(buf) {
|
|
return 0, fmt.Errorf("malformed bitmap, is-run bitmap overruns buffer at %d", pos+isRunBitmapSize)
|
|
}
|
|
|
|
isRunBitmap = buf[pos : pos+isRunBitmapSize]
|
|
pos += isRunBitmapSize
|
|
} else if cookie == serialCookieNoRunContainer {
|
|
size = binary.LittleEndian.Uint32(buf[pos:])
|
|
pos += 4
|
|
} else {
|
|
return 0, fmt.Errorf("error in roaringArray.readFrom: did not find expected serialCookie in header")
|
|
}
|
|
if size > (1 << 16) {
|
|
return 0, fmt.Errorf("It is logically impossible to have more than (1<<16) containers.")
|
|
}
|
|
// descriptive header
|
|
// keycard - is {key, cardinality} tuple slice
|
|
if pos+2*2*int(size) > len(buf) {
|
|
return 0, fmt.Errorf("malfomred bitmap, key-cardinality slice overruns buffer at %d", pos+2*2*int(size))
|
|
}
|
|
keycard := byteSliceAsUint16Slice(buf[pos : pos+2*2*int(size)])
|
|
pos += 2 * 2 * int(size)
|
|
|
|
if !haveRunContainers || size >= noOffsetThreshold {
|
|
pos += 4 * int(size)
|
|
}
|
|
|
|
// Allocate slices upfront as number of containers is known
|
|
if cap(ra.containers) >= int(size) {
|
|
ra.containers = ra.containers[:size]
|
|
} else {
|
|
ra.containers = make([]container, size)
|
|
}
|
|
if cap(ra.keys) >= int(size) {
|
|
ra.keys = ra.keys[:size]
|
|
} else {
|
|
ra.keys = make([]uint16, size)
|
|
}
|
|
if cap(ra.needCopyOnWrite) >= int(size) {
|
|
ra.needCopyOnWrite = ra.needCopyOnWrite[:size]
|
|
} else {
|
|
ra.needCopyOnWrite = make([]bool, size)
|
|
}
|
|
|
|
for i := uint32(0); i < size; i++ {
|
|
key := uint16(keycard[2*i])
|
|
card := int(keycard[2*i+1]) + 1
|
|
ra.keys[i] = key
|
|
ra.needCopyOnWrite[i] = true
|
|
|
|
if haveRunContainers && isRunBitmap[i/8]&(1<<(i%8)) != 0 {
|
|
// run container
|
|
nr := binary.LittleEndian.Uint16(buf[pos:])
|
|
pos += 2
|
|
if pos+int(nr)*4 > len(buf) {
|
|
return 0, fmt.Errorf("malformed bitmap, a run container overruns buffer at %d:%d", pos, pos+int(nr)*4)
|
|
}
|
|
nb := runContainer16{
|
|
iv: byteSliceAsInterval16Slice(buf[pos : pos+int(nr)*4]),
|
|
card: int64(card),
|
|
}
|
|
pos += int(nr) * 4
|
|
ra.containers[i] = &nb
|
|
} else if card > arrayDefaultMaxSize {
|
|
// bitmap container
|
|
nb := bitmapContainer{
|
|
cardinality: card,
|
|
bitmap: byteSliceAsUint64Slice(buf[pos : pos+arrayDefaultMaxSize*2]),
|
|
}
|
|
pos += arrayDefaultMaxSize * 2
|
|
ra.containers[i] = &nb
|
|
} else {
|
|
// array container
|
|
nb := arrayContainer{
|
|
byteSliceAsUint16Slice(buf[pos : pos+card*2]),
|
|
}
|
|
pos += card * 2
|
|
ra.containers[i] = &nb
|
|
}
|
|
}
|
|
|
|
return int64(pos), nil
|
|
}
|
|
|
|
func (ra *roaringArray) readFrom(stream io.Reader) (int64, error) {
|
|
pos := 0
|
|
var cookie uint32
|
|
err := binary.Read(stream, binary.LittleEndian, &cookie)
|
|
if err != nil {
|
|
return 0, fmt.Errorf("error in roaringArray.readFrom: could not read initial cookie: %s", err)
|
|
}
|
|
pos += 4
|
|
var size uint32
|
|
haveRunContainers := false
|
|
var isRun *bitmapContainer
|
|
if cookie&0x0000FFFF == serialCookie {
|
|
haveRunContainers = true
|
|
size = uint32(uint16(cookie>>16) + 1)
|
|
bytesToRead := (int(size) + 7) / 8
|
|
numwords := (bytesToRead + 7) / 8
|
|
by := make([]byte, bytesToRead, numwords*8)
|
|
nr, err := io.ReadFull(stream, by)
|
|
if err != nil {
|
|
return 8 + int64(nr), fmt.Errorf("error in readFrom: could not read the "+
|
|
"runContainer bit flags of length %v bytes: %v", bytesToRead, err)
|
|
}
|
|
pos += bytesToRead
|
|
by = by[:cap(by)]
|
|
isRun = newBitmapContainer()
|
|
for i := 0; i < numwords; i++ {
|
|
isRun.bitmap[i] = binary.LittleEndian.Uint64(by)
|
|
by = by[8:]
|
|
}
|
|
} else if cookie == serialCookieNoRunContainer {
|
|
err = binary.Read(stream, binary.LittleEndian, &size)
|
|
if err != nil {
|
|
return 0, fmt.Errorf("error in roaringArray.readFrom: when reading size, got: %s", err)
|
|
}
|
|
pos += 4
|
|
} else {
|
|
return 0, fmt.Errorf("error in roaringArray.readFrom: did not find expected serialCookie in header")
|
|
}
|
|
if size > (1 << 16) {
|
|
return 0, fmt.Errorf("It is logically impossible to have more than (1<<16) containers.")
|
|
}
|
|
// descriptive header
|
|
keycard := make([]uint16, 2*size, 2*size)
|
|
err = binary.Read(stream, binary.LittleEndian, keycard)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
pos += 2 * 2 * int(size)
|
|
// offset header
|
|
if !haveRunContainers || size >= noOffsetThreshold {
|
|
io.CopyN(ioutil.Discard, stream, 4*int64(size)) // we never skip ahead so this data can be ignored
|
|
pos += 4 * int(size)
|
|
}
|
|
for i := uint32(0); i < size; i++ {
|
|
key := int(keycard[2*i])
|
|
card := int(keycard[2*i+1]) + 1
|
|
if haveRunContainers && isRun.contains(uint16(i)) {
|
|
nb := newRunContainer16()
|
|
nr, err := nb.readFrom(stream)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
pos += nr
|
|
ra.appendContainer(uint16(key), nb, false)
|
|
} else if card > arrayDefaultMaxSize {
|
|
nb := newBitmapContainer()
|
|
nr, err := nb.readFrom(stream)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
nb.cardinality = card
|
|
pos += nr
|
|
ra.appendContainer(keycard[2*i], nb, false)
|
|
} else {
|
|
nb := newArrayContainerSize(card)
|
|
nr, err := nb.readFrom(stream)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
pos += nr
|
|
ra.appendContainer(keycard[2*i], nb, false)
|
|
}
|
|
}
|
|
return int64(pos), nil
|
|
}
|
|
|
|
func (ra *roaringArray) hasRunCompression() bool {
|
|
for _, c := range ra.containers {
|
|
switch c.(type) {
|
|
case *runContainer16:
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func (ra *roaringArray) writeToMsgpack(stream io.Writer) error {
|
|
|
|
ra.conserz = make([]containerSerz, len(ra.containers))
|
|
for i, v := range ra.containers {
|
|
switch cn := v.(type) {
|
|
case *bitmapContainer:
|
|
bts, err := cn.MarshalMsg(nil)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
ra.conserz[i].t = bitmapContype
|
|
ra.conserz[i].r = bts
|
|
case *arrayContainer:
|
|
bts, err := cn.MarshalMsg(nil)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
ra.conserz[i].t = arrayContype
|
|
ra.conserz[i].r = bts
|
|
case *runContainer16:
|
|
bts, err := cn.MarshalMsg(nil)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
ra.conserz[i].t = run16Contype
|
|
ra.conserz[i].r = bts
|
|
default:
|
|
panic(fmt.Errorf("Unrecognized container implementation: %T", cn))
|
|
}
|
|
}
|
|
w := snappy.NewWriter(stream)
|
|
err := msgp.Encode(w, ra)
|
|
ra.conserz = nil
|
|
return err
|
|
}
|
|
|
|
func (ra *roaringArray) readFromMsgpack(stream io.Reader) error {
|
|
r := snappy.NewReader(stream)
|
|
err := msgp.Decode(r, ra)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if len(ra.containers) != len(ra.keys) {
|
|
ra.containers = make([]container, len(ra.keys))
|
|
}
|
|
|
|
for i, v := range ra.conserz {
|
|
switch v.t {
|
|
case bitmapContype:
|
|
c := &bitmapContainer{}
|
|
_, err = c.UnmarshalMsg(v.r)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
ra.containers[i] = c
|
|
case arrayContype:
|
|
c := &arrayContainer{}
|
|
_, err = c.UnmarshalMsg(v.r)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
ra.containers[i] = c
|
|
case run16Contype:
|
|
c := &runContainer16{}
|
|
_, err = c.UnmarshalMsg(v.r)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
ra.containers[i] = c
|
|
default:
|
|
return fmt.Errorf("unrecognized contype serialization code: '%v'", v.t)
|
|
}
|
|
}
|
|
ra.conserz = nil
|
|
return nil
|
|
}
|
|
|
|
func (ra *roaringArray) advanceUntil(min uint16, pos int) int {
|
|
lower := pos + 1
|
|
|
|
if lower >= len(ra.keys) || ra.keys[lower] >= min {
|
|
return lower
|
|
}
|
|
|
|
spansize := 1
|
|
|
|
for lower+spansize < len(ra.keys) && ra.keys[lower+spansize] < min {
|
|
spansize *= 2
|
|
}
|
|
var upper int
|
|
if lower+spansize < len(ra.keys) {
|
|
upper = lower + spansize
|
|
} else {
|
|
upper = len(ra.keys) - 1
|
|
}
|
|
|
|
if ra.keys[upper] == min {
|
|
return upper
|
|
}
|
|
|
|
if ra.keys[upper] < min {
|
|
// means
|
|
// array
|
|
// has no
|
|
// item
|
|
// >= min
|
|
// pos = array.length;
|
|
return len(ra.keys)
|
|
}
|
|
|
|
// we know that the next-smallest span was too small
|
|
lower += (spansize >> 1)
|
|
|
|
mid := 0
|
|
for lower+1 != upper {
|
|
mid = (lower + upper) >> 1
|
|
if ra.keys[mid] == min {
|
|
return mid
|
|
} else if ra.keys[mid] < min {
|
|
lower = mid
|
|
} else {
|
|
upper = mid
|
|
}
|
|
}
|
|
return upper
|
|
}
|
|
|
|
func (ra *roaringArray) markAllAsNeedingCopyOnWrite() {
|
|
for i := range ra.needCopyOnWrite {
|
|
ra.needCopyOnWrite[i] = true
|
|
}
|
|
}
|
|
|
|
func (ra *roaringArray) needsCopyOnWrite(i int) bool {
|
|
return ra.needCopyOnWrite[i]
|
|
}
|
|
|
|
func (ra *roaringArray) setNeedsCopyOnWrite(i int) {
|
|
ra.needCopyOnWrite[i] = true
|
|
}
|