Version 13 (modified by bacek, 5 years ago)

Add TODO section for gc_massacre branch.

Rough idea of GC rework

  1. Decouple GC from Memory Allocation (Remove all this Pool/Arena/Whatever)
  2. Made GC track GCable. (Without going through Pools to find if we need more memory/iterating headers/etc)
    1. Implement sweep-free GC. (And it will be much easy because GC track GCable already)
  3. Break API to make Parrot_gc_mark_PMC_alive to accept PMC**. (And probably rename it into gc_visit_pmc)
  4. Implement Compacting GC.
  5. Implement Generational GC on top of Compacting.
  6. "Optional" rename VTABLE_mark to VTABLE_gc_visit 'cause I want to reuse it in Compacting phase.
    1. Or try to reuse VTABLE_visit

TODO list for gc_massacre branch

  1. Extract PMC_Allocator from gc_ms into standalone class.
    1. PoolAllocator for single size headers already extracted.
    2. Just create something like
         class PMC_Allocator {
             has @!allocators;
      
             method allocate_pmc_attributes(size_t size) {
                 return self.allocators[size % sizeof (void*)].allocate();
             }  
      }
      
  1. Implement proper string allocating/compacting. Probably by extracting gc_ms stuff into StringAllocator class.
  2. Implement TMS.get_info method.

Implementation of pseudo-code for items 1 and 2

Parrot's objects

# Current PObj.
class PObj {
    has $flags;
};

class PMC is PObj {
    method mark($gc) { ... }
};

class RPA is PMC {
    has @pmcs;

    method mark($gc) {
        $gc.mark_alive($_) for self.pmcs;
    }
};

Allocator API

# GC don't care how this work. It just have to work _fast_.
class Allocator {
    has $object_size;

    method allocate() { ... };
    method free($obj) { ... };

    method Bool is_owned($obj) { ... };
};

Simple (current) Mark&Sweep

class GC {
    has $allocator;
    has $allocated_since_last_gc;
    has $allocations_between_gc;

    has @objects;

    method BUILD() {
        self.allocator = Allocator.new(sizeof(PObj));
    };

    method get_object() {
        if self.need_gc() {
            self.do_gc();
        };

        my $obj := self.allocator.allocate();
        @objects.push($obj);

        return $obj;
    }

    method need_gc() {
        ++self.allocated_since_last_gc >= self.allocations_between_gc;
    }

    # Current GC schema.
    method do_gc() {
        # Mark phase
        self.mark_alive($_) for self.trace_roots();

        self.sweep();
    }

    method is_pmc($void) {
        self.allocator.is_owned($void);
    }

    method trace_roots() {
        my @roots;
        for $system_areas -> $ptr {
            @roots.push($ptr) is self.is_pmc($void);
        }
        @roots;
    }

    method sweep() {
        # sweep phase
        for @objects -> $obj {
            if $obj.is_alive {
                $obj.is_alive := Bool::False;
            }
            else {
                self.allocator.free($obj);
            }
        }

        self.allocated_since_last_gc := 0;
    }

    method mark_alive($obj) {
        return if $obj.is_alive;

        $obj.is_alive := Bool::True;
        $obj.mark(self);
    }
};

Incremental M&S

class IncrementalGC is GC {
    # Grey objects.
    has @working_set;
    has $do_marking;

    # Override need_gc
    method need_gc() {
        return Bool::False if self.do_marking;
        return self.SUPER.need_gc();
    }

    method get_object() {
        self.mark_a_little_bit() if self.do_marking;
        self.SUPER.get_object();
    }

    method do_gc() {
        # "Mark" phase
        self.do_marking  := Bool::True;
        self.working_set := self.trace_roots();
    }

    method mark_a_little_bit() {
        my $count := 0;
        for self.working_set -> $obj {
            return if $count++ > 10;
            self.real_mark($obj);
        }

        self.do_marking := Bool::False;
        self.sweep();
    }

    # Override GC.mark_alive
    method mark_alive($obj) {
        self.working_set.push($obj);
    }

    # Just redispatch to GC.mark_alive
    method real_mark($obj) {
        self.SUPER.mark_alive($obj);
    }
}

TriColour M&S

# is non-recursive GC
class TriColourGC is GC {
    # Black (inherited from GC)
    # We need separate @live_list if we want to do it incremental
    # has @objects;

    # Grey
    has @grey_objects;
    # White during GC
    has @dead_objects;

    method do_gc() {
        self.dead_objects  = self.objects;
        self.objects       = ();

        self.grey_objects  = self.trace_roots();
        # mark_alive will push into self.grey_objects
        self.mark_real($_) for self.grey_objects;

        # Sweep
        for self.dead_objects -> $dead {
            $dead.destroy();
            self.allocator.free($dead);
        }
    }

    method mark_alive($obj) {
        self.grey_objects.push($obj);
        self.dead_objects.remove($obj);
    }

    method mark_real($obj) {
        self.objects.push($obj);
        self.SUPER.mark_alive($obj);
    }
};

Non-recursive, tri-colour, incremental mark and sweep

This will not work because we can update objects already marked as live during incremental mark. To make it work we have to override VTABLE for marked objects to put them into @grey_objects during mark.

Also, freshly allocated objects in mark phase should be appended into @grey_objects, not @objects. Because they can have pointers to "otherwise-be-dead" objects.

class IncrementalTriColourGC is GC {
    has @live_objects;  # Black
    has @grey_objects;  # Guess?
    has @dead_objects;  # White

    has $do_marking;    # We are in mark phase.
    has $do_sweeping;   # We are in sweep phase.

    method need_gc() {
        return Bool::False if self.do_marking || self.do_sweeping;
        return self.SUPER.need_gc();
    }

    method get_object() {
        self.mark_a_little_bit() if self.do_marking;
        self.sweep_a_little_bit() if self.do_sweeping;
        self.SUPER.get_object();
    }

    method do_gc() {
        # Prepare mark phase
        self.do_marking    = Bool::True;
        self.do_sweeping   = Bool::False;
        self.dead_objects  = self.objects;
        self.objects       = ();

        self.grey_objects  = self.trace_roots();
    }

    method mark_a_little_bit() {
        my $count = 0;
        while(my $obj = self.grey_objects.pop) {
            self.mark_real($obj);
            return if $count++ > 10;
        }

        # Switch to incremental sweep.
        self.do_marking  = Bool::False;
        self.do_sweeping = Bool::True;
    }

    method sweep_a_little_bit() {
        my $count = 0;
        while(my $dead = self.dead_objects.pop) {
            $dead.destroy();
            self.allocator.free($dead);
            return if $count++ > 10;
        }

        # self.objects can be updated at this point of time.
        # Add live_objects to objects to use on next GC round.
        self.objects.push(self.live_objects);

        # .grey_objects .dead_objects are empty now.
        # Back to normal life
        self.do_sweeping = Bool::False;
    }

    method mark_alive($obj) {
        self.dead_objects.remove($obj);
        self.grey_objects.push($obj);
    }

    method mark_real($obj) {
        self.live_objects.push($obj);
        self.SUPER.mark_alive($obj);
    }
};

# vim: ft=perl6

Implementation of PoolAllocator

class PoolAllocator {
    # 4KB page
    # We reuse storage to store index of next free object.
    # For detailed design see Alexandresku's SmallObjectAllocator
    # In the nutshell:
    # Initial content of $current_index, @objects is
    # 0 => [1, 2, 3, -1]
    # first alloc:
    # 1 => [*, 2, 3, -1]
    # second alloc:
    # 2 => [*, *, 3, -1]
    # free first object
    # 0 => [2, *, 3, -1]
    class Page {
        # fixed size header.
        my int $current_index;
        my int $object_size;
        my Page* $next_free_page;

        # Number of objects is (4096-sizeof(header))/$object_size;
        my @objects;

        method BUILD() {
            self.current_index := 0;
            # Initialize "linked list"
            for 0..+self.objects-1 -> $i {
                self.objects[$i] = $i+1;
            }
            # Terminate list
            self.objects[*-1] := -1;
        }

        method allocate() {
            # Use @objects as "linked list"
            my $res       = (void*)self.objects[self.current_index];
            self.current_index = (int)*$res;
            $res;
        }

        method free($obj) {
            # Get index of freed object
            my $index := ptr_diff($obj - @objects)/self.object_size;

            # Store current_index in it.
            self.objects[$index] = self.current_index;
            self.current_index   = $index;
        }

        method is_full() { self.current_index < 0 };
    };

    # Pool data
    # Sorted array of pages to use binary search for finding pointer.
    has @pages of Page;
    # Linked list to pages with free storage.
    has $current_page;

    has $object_size;

    method BUILD() {
        self.current_page = Page->new(self.object_size);
        self.pages.push(self.current_page);
    }

    method allocate() {
        # Invariant: current page always have space.
        my $res = self.current_page.allocate();

        # Allocate new page if it's filled
        if self.current_page.is_full {
            # Try to reuse old page
            my $page = self.current_page.next_free_page;
            if $page {
                self.current_page.next_free_page := NULL;
                self.current_page = $page;
            }
            else {
                # We have to allocate new one.
                $page = Page->new(self.object_size);
                self.current_page = $page;

                # Use binary search to put $page into proper position.
                my $pos = lower_bound(self.pages, $page);
                self.pages.splice($pos+1, 0, 0, $page);
            }
        }
    }

    method free($obj) {
        my $page = self.find_page($obj);
        assert($page);
        # If page was full than put it on free list.
        my $put_in_free_list := !$page.is_full;

        $page.free($obj);

        if $put_in_free_list {
            $page.next_free_page = self.current_page;
            self.current_page    = $page;
        }
    }

    # Find page which holds $obj.
    method find_page($obj) {
        my $pos := lower_bound(self.pages, $obj);
        if ptr_diff(self.pages[$pos], $obj) < 4096 {
            return self.pages[$pos];
        }
        return NULL;
    }

    # Pool owns $obj if there is Page for it.
    method bool is_owned($obj) {
        self.find_page($obj) != NULL;
    }
};


Rename and extend current "PMC_Allocator"

Current PMC_Allocator is pretty fast, well tested and (almost) suitable to be described Allocator. We just have to implement Allocator.is_owned method. Design of PMC_Allocator isn't "tuned" for such usecase, but it should be fine because .is_owned called only during tracing system stack.

Roughly implmentation of .is_owned will be this:

PMC_Allocator.is_owned($obj) {
    my $arena = self.top_arena;
    while $arena {
       return Bool::True if ptr_diff($arena, $obj) < GC_FIXED_SIZE_POOL_SIZE;
       $arena = $arena.prev;
    }
    return Bool::False;
}