Index: parrot/tools/dev/pbc_to_exe.pir =================================================================== --- parrot/tools/dev/pbc_to_exe.pir (revision 38385) +++ parrot/tools/dev/pbc_to_exe.pir (working copy) @@ -32,9 +32,23 @@ (infile, cfile, objfile, exefile) = 'handle_args'(argv) unless infile > '' goto err_infile + # Generate the string (on non-Win32) or string table (on Win32) that + # contains all the bytecode. Also generate the get_program_code function + # that masks the difference between the two representations. .local string codestring + $P0 = '_config'() + .local string osname + osname = $P0['osname'] + # XXX This needs to test for MS compiler, not just Win32; Mingw32 does not need the slow version. + unless osname == 'MSWin32' goto code_for_non_windows + code_for_windows: + codestring = 'generate_code_win32'(infile) + goto code_end + code_for_non_windows: codestring = 'generate_code'(infile) + code_end: + open_outfile: .local pmc outfh outfh = open cfile, 'w' @@ -47,11 +61,16 @@ print outfh, codestring print outfh, <<'MAIN' + int main(int argc, char *argv[]) { PackFile *pf; Parrot_Interp interp; + const char * const program_code = get_program_code(); + if (!program_code) + return 1; + Parrot_set_config_hash(); interp = Parrot_new( NULL ); @@ -172,14 +191,52 @@ .return(infile, cfile, objfile, exefile) .end +# The PBC will be represented as a C string, so this sub builds a table +# of the C representation of each ASCII character, for lookup by ordinal value. +.sub 'generate_encoding_table' + # Use '\%o' for speed, or '\x%02x' for readability + .const string encoding_format = '\%o' + + # The 'sprintf' op requires the arglist to be in an array, even when + # there is only one arg. + .local pmc one_number + one_number = new 'FixedIntegerArray' + set one_number, 1 + + .local pmc coded_strings + coded_strings = new 'FixedStringArray' + set coded_strings, 256 + + .local int index + index = 0 + + next_index: + one_number[0] = index + $S0 = sprintf encoding_format, one_number + coded_strings[index] = $S0 + inc index + if index < 256 goto next_index + + .return (coded_strings) +.end + +# With GCC (and all other known non-Microsoft compilers), huge string constants +# are allowed. We generate a single C string to represent the entire bytecode. +# The get_program_code function simply returns a pointer to the compile-time +# string. .sub 'generate_code' .param string infile .local pmc ifh ifh = open infile, 'r' unless ifh goto err_infile + + .local pmc encoding_table + encoding_table = 'generate_encoding_table'() + .local string codestring .local int size - codestring = "const Parrot_UInt1 program_code[] = {" + codestring = "const char * const program_code_raw =\n" + codestring .= '"' size = 0 read_loop: @@ -195,14 +252,15 @@ code_loop: unless pos < pbclength goto code_done $I0 = ord pbcstring, pos - $S0 = $I0 + $S0 = encoding_table[$I0] codestring .= $S0 - codestring .= ',' inc pos inc size $I0 = size % 32 unless $I0 == 0 goto code_loop + codestring .= '"' codestring .= "\n" + codestring .= '"' goto code_loop code_done: goto read_loop @@ -210,17 +268,145 @@ read_done: close ifh - codestring .= "\n};\n\n" + codestring .= '"' + codestring .= "\n;\n\n" codestring .= "const int bytecode_size = " $S0 = size codestring .= $S0 codestring .= ";\n" + + $S0 = <<'SUBROUTINE' + const char * get_program_code(void); + const char * get_program_code(void) + { + return program_code_raw; + } +SUBROUTINE + codestring .= $S0 + .return (codestring) err_infile: die "cannot open infile" .end + +# On the most limited known version of the Microsoft C compiler, 16KB is the +# maximum size of a string. We generate an array of C strings to represent +# the bytecode; each string is of a fixed size smaller than 16KB. +# The get_program_code() function allocates a block large enough to contain +# the entire bytecode, then fills the block with the C strings at run-time. +.sub 'generate_code_win32' + .param string infile + .local pmc ifh + ifh = open infile, 'r' + unless ifh goto err_infile + + # Since we cannot use the last byte (the end-of-string NULL), the maximum + # block size would be 16384-1. However, we will use 16384-32 (the number + # of bytes in each line) to simplify the code. + .const int line_length = 32 + .const int max_block_size = 16352 + + .local pmc encoding_table + encoding_table = 'generate_encoding_table'() + + .local string codestring + .local int size + codestring = "const char * const program_code_array[] = {\n" + size = 0 + + read_loop: + .local string pbcstring + .local int pbclength + + pbcstring = read ifh, max_block_size + pbclength = length pbcstring + unless pbclength > 0 goto read_done + + # This padding is to keep the memcpy() from ever having to deal with a short block. + pad_to_full_block: + unless pbclength < max_block_size goto end_pad + pbcstring .= "\0" + inc pbclength + goto pad_to_full_block + end_pad: + + if size == 0 goto skip_comma_separating_strings + codestring .= ",\n" + skip_comma_separating_strings: + + + .local int pos + pos = 0 + code_loop: + unless pos < pbclength goto code_done + + $I0 = pos % line_length + unless $I0 == 0 goto skip_line_start_quote + codestring .= ' "' + skip_line_start_quote: + + + $I0 = ord pbcstring, pos + $S0 = encoding_table[$I0] + codestring .= $S0 + inc pos + inc size + + + $I0 = size % line_length + unless $I0 == 0 goto skip_line_end_quote + codestring .= '"' + codestring .= "\n" + skip_line_end_quote: + + goto code_loop + code_done: + goto read_loop + + read_done: + close ifh + + codestring .= ",\nNULL\n" + codestring .= "};\n\n" + + codestring .= "const int bytecode_size = " + $S0 = size + codestring .= $S0 + codestring .= ";\n" + + codestring .= "const int max_block_size = " + $S0 = max_block_size + codestring .= $S0 + codestring .= ";\n" + + $S0 = <<'SUBROUTINE' + const char * get_program_code(void); + const char * get_program_code(void) + { + int i; + char *p, *program_code_in_one_block; + + program_code_in_one_block = malloc( bytecode_size ); + if (!program_code_in_one_block) + return NULL; + + for ( i = 0, p = program_code_in_one_block; program_code_array[i]; i++, p += max_block_size ) + memcpy( p, program_code_array[i], max_block_size ); + + return program_code_in_one_block; + } +SUBROUTINE + codestring .= $S0 + + .return (codestring) + + err_infile: + die "cannot open infile" +.end + + # util functions .sub 'compile_file' .param string cfile