decompiler
1.0.0
|
A generic source code pretty printer. More...
#include <prettyprint.hh>
Public Member Functions | |
EmitPrettyPrint (void) | |
Construct with an initial maximum line size. | |
virtual int4 | beginDocument (void) |
Begin a whole document of output. More... | |
virtual void | endDocument (int4 id) |
End a whole document of output. More... | |
virtual int4 | beginFunction (const Funcdata *fd) |
Begin a whole declaration and body of a function. More... | |
virtual void | endFunction (int4 id) |
End a whole declaration and body of a function. More... | |
virtual int4 | beginBlock (const FlowBlock *bl) |
Begin a control-flow element. More... | |
virtual void | endBlock (int4 id) |
End a control-flow element. More... | |
virtual void | tagLine (void) |
Force a line break. More... | |
virtual void | tagLine (int4 indent) |
Force a line break and indent level. More... | |
virtual int4 | beginReturnType (const Varnode *vn) |
Begin a return type declaration. More... | |
virtual void | endReturnType (int4 id) |
End a return type declaration. More... | |
virtual int4 | beginVarDecl (const Symbol *sym) |
Begin a variable declaration. More... | |
virtual void | endVarDecl (int4 id) |
End a variable declaration. More... | |
virtual int4 | beginStatement (const PcodeOp *op) |
Begin a source code statement. More... | |
virtual void | endStatement (int4 id) |
End a source code statement. More... | |
virtual int4 | beginFuncProto (void) |
Begin a function prototype declaration. More... | |
virtual void | endFuncProto (int4 id) |
End a function prototype declaration. More... | |
virtual void | tagVariable (const char *ptr, syntax_highlight hl, const Varnode *vn, const PcodeOp *op) |
Emit a variable token. More... | |
virtual void | tagOp (const char *ptr, syntax_highlight hl, const PcodeOp *op) |
Emit an operation token. More... | |
virtual void | tagFuncName (const char *ptr, syntax_highlight hl, const Funcdata *fd, const PcodeOp *op) |
Emit a function identifier. More... | |
virtual void | tagType (const char *ptr, syntax_highlight hl, const Datatype *ct) |
Emit a data-type identifier. More... | |
virtual void | tagField (const char *ptr, syntax_highlight hl, const Datatype *ct, int4 off) |
Emit an identifier for a field within a structured data-type. More... | |
virtual void | tagComment (const char *ptr, syntax_highlight hl, const AddrSpace *spc, uintb off) |
Emit a comment string as part of the generated source code. More... | |
virtual void | tagLabel (const char *ptr, syntax_highlight hl, const AddrSpace *spc, uintb off) |
Emit a code label identifier. More... | |
virtual void | print (const char *str, syntax_highlight hl=no_color) |
Emit other (more unusual) syntax as part of source code generation. More... | |
virtual int4 | openParen (char o, int4 id=0) |
Emit an open parenthesis. More... | |
virtual void | closeParen (char c, int4 id) |
Emit a close parenthesis. More... | |
virtual int4 | openGroup (void) |
Start a group of things that are printed together. More... | |
virtual void | closeGroup (int4 id) |
End a group of things that are printed together. More... | |
virtual void | clear (void) |
Reset the emitter to its initial state. | |
virtual void | setOutputStream (ostream *t) |
Set the output stream for the emitter. | |
virtual ostream * | getOutputStream (void) const |
Get the current output stream. | |
virtual void | spaces (int4 num, int4 bump=0) |
Emit a sequence of space characters as part of source code. More... | |
virtual int4 | startIndent (void) |
Start a new indent level. More... | |
virtual void | stopIndent (int4 id) |
End an indent level. More... | |
virtual int4 | startComment (void) |
Start a comment block within the emitted source code. More... | |
virtual void | stopComment (int4 id) |
End a comment block. More... | |
virtual void | flush (void) |
Flush any remaining character data. More... | |
virtual void | setMaxLineSize (int4 val) |
Provide a maximum line size to the pretty printer. More... | |
virtual int4 | getMaxLineSize (void) const |
Get the current maximum line size. More... | |
virtual void | setCommentFill (const string &fill) |
Set the comment fill characters for when line breaks are forced. More... | |
virtual bool | emitsXml (void) const |
Determine if this is an XML markup emitter. More... | |
virtual void | resetDefaults (void) |
(Re)set the default emitting options | |
void | setXML (bool val) |
Toggle whether the low-level emitter emits XML markup or not. More... | |
Public Member Functions inherited from EmitXml | |
EmitXml (void) | |
Constructor. | |
virtual | ~EmitXml (void) |
Destructor. | |
int4 | getParenLevel (void) const |
Get the current parentheses depth. More... | |
int4 | getIndentIncrement (void) const |
Get the number of characters indented per level of nesting. More... | |
void | setIndentIncrement (int4 val) |
Set the number of characters indented per level of nesting. More... | |
Private Member Functions | |
void | expand (void) |
Expand the stream buffer. More... | |
void | checkstart (void) |
Enforce whitespace for a start token. More... | |
void | checkend (void) |
Enforce whitespace for an end token. More... | |
void | checkstring (void) |
Enforce whitespace for a content token. More... | |
void | checkbreak (void) |
Enforce whitespace for a line break. More... | |
void | overflow (void) |
Reset indenting levels to accommodate a token that won't fit. More... | |
void | print (const TokenSplit &tok) |
Output the given token to the low-level emitter. More... | |
void | advanceleft (void) |
Emit tokens that have been fully committed. More... | |
void | scan (void) |
Process a new token. More... | |
void | resetDefaultsPrettyPrint (void) |
Private Attributes | |
EmitXml * | lowlevel |
The low-level emitter. | |
vector< int4 > | indentstack |
Space available for currently active nesting levels. | |
int4 | spaceremain |
Space remaining in current line. | |
int4 | maxlinesize |
Maximum number of characters allowed in a line. | |
int4 | leftotal |
of characters committed from the current line | |
int4 | rightotal |
of characters yet to be committed from the current line | |
bool | needbreak |
true if break needed before next token | |
bool | commentmode |
true if in the middle of a comment | |
string | commentfill |
Used to fill comments if line breaks are forced. | |
circularqueue< int4 > | scanqueue |
References to current open and whitespace tokens. | |
circularqueue< TokenSplit > | tokqueue |
The full stream of tokens. | |
Additional Inherited Members | |
Public Types inherited from EmitXml | |
enum | syntax_highlight { keyword_color = 0, comment_color = 1, type_color = 2, funcname_color = 3, var_color = 4, const_color = 5, param_color = 6, global_color = 7, no_color = 8 } |
Possible types of syntax highlighting. More... | |
Protected Member Functions inherited from EmitXml | |
void | resetDefaultsInternal (void) |
Set options to default values for EmitXml. | |
Protected Attributes inherited from EmitXml | |
ostream * | s |
Stream being emitted to. | |
int4 | indentlevel |
Current indent level (in fixed width characters) | |
int4 | parenlevel |
Current depth of parentheses. | |
int4 | indentincrement |
Change in indentlevel per level of nesting. | |
A generic source code pretty printer.
This pretty printer is based on the standard Derek C. Oppen pretty printing algorithm. It allows configurable indenting, spacing, and line breaks that enhances the readability of the high-level language output. It makes use of the extra information inherent in the AST to make decisions about how to best print language statements. It attempts to abstract the main formatting elements of imperative languages: statements, code blocks, declarations, etc., and so should be largely language independent. In this way, the main language emitter doesn't have to worry about formatting issues.
This emitter encapsulates a lower-level emitter that does the final emitting to stream and may add XML markup.
|
private |
Emit tokens that have been fully committed.
Groups of tokens that have been fully committed are sent to the low-level emitter and purged from the queue. Delimiter tokens that open a new printing group initially have a negative size, indicating the group is uncommitted and may need additional line breaks inserted. As the ending delimiters are scanned and/or line breaks are forced. The negative sizes are converted to positive and the corresponding group becomes committed, and the constituent content is emitted by this method.
References EmitXml::print(), TokenSplit::tokenbreak, and TokenSplit::tokenstring.
|
virtual |
Begin a control-flow element.
Inform the emitter that a new control-flow section is starting. This is a source code unit usually surrounded with curly braces '{' and '}'.
bl | is the block structure object associated with the section |
Reimplemented from EmitXml.
References TokenSplit::beginBlock().
|
virtual |
Begin a whole document of output.
Inform the emitter that generation of the source code document has begun
Reimplemented from EmitXml.
References TokenSplit::beginDocument().
|
virtual |
Begin a function prototype declaration.
Inform the emitter that a function prototype is starting.
Reimplemented from EmitXml.
References TokenSplit::beginFuncProto().
|
virtual |
Begin a whole declaration and body of a function.
Inform the emitter that generation of a function body has begun
Reimplemented from EmitXml.
References TokenSplit::beginFunction().
|
virtual |
Begin a return type declaration.
Inform the emitter that generation of a function's return type is starting.
vn | (if non-null) is the storage location for the return value |
Reimplemented from EmitXml.
References TokenSplit::beginReturnType().
|
virtual |
Begin a source code statement.
Inform the emitter that a source code statement is beginning.
op | is the root p-code operation of the statement |
Reimplemented from EmitXml.
References TokenSplit::beginStatement().
|
virtual |
Begin a variable declaration.
Inform the emitter that a variable declaration has started.
sym | is the symbol being declared |
Reimplemented from EmitXml.
References TokenSplit::beginVarDecl().
|
private |
Enforce whitespace for a line break.
Make sure there is some content either in the current print group or following the last line break, inserting an empty string token if necessary, before emitting a line break token.
References EmitXml::no_color, and TokenSplit::print().
|
private |
Enforce whitespace for an end token.
Make sure there is some content either in the current print group or following the last line break, inserting an empty string token if necessary, before emitting an end token.
References EmitXml::no_color, and TokenSplit::print().
|
private |
Enforce whitespace for a start token.
Make sure there is whitespace after the last content token, inserting a zero-sized whitespace token if necessary, before emitting a start token.
References TokenSplit::spaces().
|
private |
Enforce whitespace for a content token.
Make sure there is whitespace after the last content token, inserting a zero-sized whitespace token if necessary, before emitting a content token.
References TokenSplit::spaces().
|
virtual |
End a group of things that are printed together.
Inform the emitter that a printing group is ending.
id | is the id associated with the group (as returned by openGroup) |
Reimplemented from EmitXml.
References TokenSplit::closeGroup().
|
virtual |
Emit a close parenthesis.
This method emits the parenthesis character itself and ends the printing unit that was started by the matching open parenthesis.
c | is the close parenthesis character to emit |
id | is the id associated with the matching open parenthesis (as returned by openParen) |
Reimplemented from EmitXml.
References EmitXml::closeGroup(), and TokenSplit::closeParen().
|
inlinevirtual |
Determine if this is an XML markup emitter.
Reimplemented from EmitXml.
References EmitXml::emitsXml(), and EmitXml::resetDefaults().
|
virtual |
End a control-flow element.
Inform the emitter that a control-flow section is ending.
id | is the id associated with the section (as returned by beginBlock) |
Reimplemented from EmitXml.
References TokenSplit::endBlock().
|
virtual |
End a whole document of output.
Inform the emitter that generation of the source code document is finished
id | is the id associated with the document (as returned by beginDocument) |
Reimplemented from EmitXml.
References TokenSplit::endDocument().
|
virtual |
End a function prototype declaration.
Inform the emitter that a function prototype is ending.
id | is the id associated with the prototype (as returned by beginFuncProto) |
Reimplemented from EmitXml.
References TokenSplit::endFuncProto().
|
virtual |
End a whole declaration and body of a function.
Inform the emitter that generation of a function body has ended
id | is the id associated with the function body (as returned by beginFunction) |
Reimplemented from EmitXml.
References TokenSplit::endFunction().
|
virtual |
End a return type declaration.
Inform the emitter that generation of a function's return type is ending.
id | is the id associated with the return type (as returned by beginReturnType) |
Reimplemented from EmitXml.
References TokenSplit::endReturnType().
|
virtual |
End a source code statement.
Inform the emitter that a source code statement is ending.
id | is the id associated with the statement (as returned by beginStatement) |
Reimplemented from EmitXml.
References TokenSplit::endStatement().
|
virtual |
End a variable declaration.
Inform the emitter that a variable declaration has ended.
id | is the id associated with the declaration (as returned by beginVarDecl) |
Reimplemented from EmitXml.
References TokenSplit::endVarDecl().
|
private |
Expand the stream buffer.
Increase the number of tokens that can be in the queue simultaneously. This is automatically called when the buffers are full. Given a fixed maximum line size for the pretty printer, the buffer should quickly reach a size that supports the biggest possible number of cached tokens. The current token queue is preserved and references into the queue are recalculated.
|
virtual |
Flush any remaining character data.
Depending on the particular emitter, tokens and syntax that have been submitted to the emitter may be held internally for a time before getting output to the final stream. This routine makes sure submitted syntax is fully output.
Reimplemented from EmitXml.
References EmitXml::print().
|
inlinevirtual |
Get the current maximum line size.
If the emitter respects a maximum line size, return that size.
Reimplemented from EmitXml.
|
virtual |
Start a group of things that are printed together.
Inform the emitter that a new printing group is starting.
Reimplemented from EmitXml.
References TokenSplit::openGroup().
|
virtual |
Emit an open parenthesis.
This method emits the parenthesis character itself and also starts a printing unit of the source code being surrounded by the parentheses.
o | is the open parenthesis character to emit |
id | is an id to associate with the parenthesis |
Reimplemented from EmitXml.
References EmitXml::openGroup(), and TokenSplit::openParen().
|
private |
Reset indenting levels to accommodate a token that won't fit.
(Permanently) adjust the current set of indent levels to guarantee a minimum amount of space and issue a line break. This disrupts currently established indenting but makes sure that at least half the line is available for the next token.
|
private |
Output the given token to the low-level emitter.
Content and markup is sent to the low-level emitter if appropriate. The indentlevel stack is adjusted as necessary depending on the token.
tok | is the given token to emit. |
References TokenSplit::begin, TokenSplit::begin_comment, TokenSplit::begin_indent, EmitXml::comment_color, TokenSplit::end, TokenSplit::end_comment, TokenSplit::end_indent, TokenSplit::getClass(), TokenSplit::getIndentBump(), TokenSplit::getNumSpaces(), TokenSplit::getSize(), TokenSplit::getTag(), TokenSplit::ignore, TokenSplit::line_t, TokenSplit::print(), TokenSplit::tokenbreak, and TokenSplit::tokenstring.
|
virtual |
Emit other (more unusual) syntax as part of source code generation.
This method is used to emit syntax not covered by the other methods, such as spaces, semi-colons, braces, and other punctuation.
str | is the character data of the syntax being emitted |
hl | indicates how the syntax should be highlighted |
Reimplemented from EmitXml.
References TokenSplit::print().
|
private |
Process a new token.
The token is assumed to be just added and at the top of the queue. This is the heart of the pretty printing algorithm. The new token is assigned a size, the queue of open references and line breaks is updated. The amount of space currently available and the size of printing groups are updated. If the current line is going to overflow, a decision is mode where in the uncommented tokens a line break needs to be inserted and what its indent level will be. If the leftmost print group closes without needing a line break, all the content it contains is committed and is sent to the low-level emitter.
References TokenSplit::begin, TokenSplit::begin_comment, TokenSplit::begin_indent, TokenSplit::end, TokenSplit::end_comment, TokenSplit::end_indent, TokenSplit::getClass(), TokenSplit::getSize(), TokenSplit::ignore, TokenSplit::setSize(), TokenSplit::tokenbreak, and TokenSplit::tokenstring.
|
inlinevirtual |
Set the comment fill characters for when line breaks are forced.
If the pretty printer forces a line break in the middle of a comment, this string is emitted to provide proper syntax and indenting to continue the comment.
fill | is the set of fill characters |
Reimplemented from EmitXml.
|
virtual |
Provide a maximum line size to the pretty printer.
The emitter may insert line breaks to enforce this maximum.
mls | is the number of characters to set for the maximum line size |
Reimplemented from EmitXml.
References EmitXml::clear().
void EmitPrettyPrint::setXML | ( | bool | val | ) |
Toggle whether the low-level emitter emits XML markup or not.
This method toggles the low-level emitter between EmitXml and EmitNoXml depending on whether XML markup is desired.
val | is true if XML markup is desired |
References EmitXml::EmitXml(), and EmitXml::setOutputStream().
|
virtual |
Emit a sequence of space characters as part of source code.
num | is the number of space characters to emit |
bump | is the number of characters to indent if the spaces force a line break |
Reimplemented from EmitXml.
References TokenSplit::spaces().
|
virtual |
Start a comment block within the emitted source code.
Inform the emitter that a set of comment tokens/lines is starting.
Reimplemented from EmitXml.
References TokenSplit::startComment().
|
virtual |
Start a new indent level.
Inform the emitter that one level of nesting is being added.
Reimplemented from EmitXml.
References EmitXml::indentincrement, and TokenSplit::startIndent().
|
virtual |
End a comment block.
Inform the emitter that a set of comment tokens/lines is ending.
id | is the id associated with the block (as returned by startComment) |
Reimplemented from EmitXml.
References TokenSplit::stopComment().
|
virtual |
End an indent level.
Inform the emitter that the current nesting has ended, and we are returning to the previous level.
id | is the id associated with the nesting (as returned by startIndent) |
Reimplemented from EmitXml.
References TokenSplit::stopIndent().
|
virtual |
Emit a comment string as part of the generated source code.
Individual comments can be broken up and emitted using multiple calls to this method, but ultimately the comment delimiters and the body of the comment are both emitted with this method, which may provide addition markup.
ptr | is the character data for the comment |
hl | indicates how the comment should be highlighted |
spc | is the address space of the address where the comment is attached |
off | is the offset of the address where the comment is attached |
Reimplemented from EmitXml.
References TokenSplit::tagComment().
|
virtual |
Emit an identifier for a field within a structured data-type.
A string representing an individual component of a structured data-type is emitted, possibly with additional markup.
ptr | is the character data for the identifier |
hl | indicates how the identifier should be highlighted |
ct | is the data-type associated with the field |
o | is the (byte) offset of the field within its structured data-type |
Reimplemented from EmitXml.
References TokenSplit::tagField().
|
virtual |
Emit a function identifier.
An identifier string representing the symbol name of the function is emitted, possible with additional markup.
ptr | is the character data for the identifier |
hl | indicates how the identifier should be highlighted |
fd | is the function |
op | is the CALL operation associated within the syntax tree or null for a declaration |
Reimplemented from EmitXml.
References TokenSplit::tagFuncName().
|
virtual |
Emit a code label identifier.
A string describing a control-flow destination, as appropriate for the source language is output, possibly with additional markup.
ptr | is the character data of the label |
hl | indicates how the label should be highlighted |
spc | is the address space of the code address being labeled |
off | is the offset of the code address being labeled |
Reimplemented from EmitXml.
References TokenSplit::tagLabel().
|
virtual |
Force a line break.
Tell the emitter that a new line is desired at the current indent level.
Reimplemented from EmitXml.
References TokenSplit::tagLine().
|
virtual |
Force a line break and indent level.
Tell the emitter that a new line is desired at a specific indent level. The indent level is overridden only for the line, then it returns to its previous value.
indent | is the desired indent level for the new line |
Reimplemented from EmitXml.
References TokenSplit::tagLine().
|
virtual |
Emit an operation token.
The string representing the operation as appropriate for the source language is emitted, possibly with additional markup.
ptr | is the character data for the emitted representation |
hl | indicates how the token should be highlighted |
op | is the PcodeOp object associated with the operation with the syntax tree |
Reimplemented from EmitXml.
References TokenSplit::tagOp().
|
virtual |
Emit a data-type identifier.
A string representing the name of a data-type, as appropriate for the source language is emitted, possibly with additional markup.
ptr | is the character data for the identifier |
hl | indicates how the identifier should be highlighted |
ct | is the data-type description object |
Reimplemented from EmitXml.
References TokenSplit::tagType().
|
virtual |
Emit a variable token.
An identifier string representing the variable is output, possibly with additional markup.
ptr | is the character data for the identifier |
hl | indicates how the identifier should be highlighted |
vn | is the Varnode representing the variable within the syntax tree |
op | is a p-code operation related to the use of the variable (may be null) |
Reimplemented from EmitXml.
References TokenSplit::tagVariable().