[cdt-patch] Patch for Scanner performance

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [List Home]
[cdt-patch] Patch for Scanner performance
From: David Daoust <ddaoust@xxxxxxxxxx>
Date: Tue, 16 Mar 2004 13:30:18 -0500
Delivered-to: cdt-patch@xxxxxxxxxxx
List-archive: <http://dev.eclipse.org/pipermail/cdt-patch/>
List-help: <mailto:cdt-patch-request@eclipse.org?subject=help>
List-subscribe: <http://dev.eclipse.org/mailman/listinfo/cdt-patch>, <mailto:cdt-patch-request@eclipse.org?subject=subscribe>
List-unsubscribe: <http://dev.eclipse.org/mailman/listinfo/cdt-patch>, <mailto:cdt-patch-request@eclipse.org?subject=unsubscribe>
Here is a patch that addresses some issues with the scanner that I found using a performance profiler.

A full scan of "w32api/windows.h" dropped by 30% with these changes.

Here are some of the numbers before and after the patch

Before
Scan: 2.2 s
Scan + parse: 5.2 s

After
Scan: 1.5 s
Scan + parse: 4.4 s

Note that there are about 100,000 tokens read in the test!

For comparison, a scan + parse of stdio.h takes about 150ms.

- Dave
Index: parser/org/eclipse/cdt/internal/core/parser/scanner/GCCScannerExtension.java
===================================================================
retrieving revision 1.5
diff -u -r1.5 GCCScannerExtension.java
--- parser/org/eclipse/cdt/internal/core/parser/scanner/GCCScannerExtension.java	11 Mar 2004 20:47:34 -0000	1.5
+++ parser/org/eclipse/cdt/internal/core/parser/scanner/GCCScannerExtension.java	16 Mar 2004 16:54:39 -0000
@@ -83,7 +83,8 @@
 			scannerData.getLogService().traceLog( "GCCScannerExtension handling #include_next directive" ); //$NON-NLS-1$
 			// figure out the name of the current file and its path
 			IScannerContext context = scannerData.getContextStack().getCurrentContext();
-			if( context.getKind() != IScannerContext.ContextKind.INCLUSION ) 
+//	There are times where context is null		
+//			if( context.getKind() != IScannerContext.ContextKind.INCLUSION ) 
 			{
 				//handle appropriate error
 			}
Index: parser/org/eclipse/cdt/internal/core/parser/scanner/Scanner.java
===================================================================
retrieving revision 1.22
diff -u -r1.22 Scanner.java
--- parser/org/eclipse/cdt/internal/core/parser/scanner/Scanner.java	11 Mar 2004 20:47:34 -0000	1.22
+++ parser/org/eclipse/cdt/internal/core/parser/scanner/Scanner.java	16 Mar 2004 16:54:41 -0000
@@ -721,7 +721,7 @@
 		return getChar();
 	}
 	
-	int getChar() throws ScannerException
+	final int getChar() throws ScannerException
 	{
 		return getChar( false );
 	}
@@ -731,12 +731,14 @@
 		
 		lastContext = scannerData.getContextStack().getCurrentContext();
 		
-		if (scannerData.getContextStack().getCurrentContext() == null)
+		if (lastContext == null)
 			// past the end of file
 			return c;
 
-        c = accountForUndo(c);
-    	int baseOffset = lastContext.getOffset() - lastContext.undoStackSize() - 1;
+       	if (lastContext.undoStackSize() != 0 )
+    		c = lastContext.popUndo();
+    	else
+    		c = readFromStream();
 		
 		if (enableTrigraphReplacement && (!insideString || enableTrigraphReplacementInStrings)) {
 			// Trigraph processing
@@ -747,39 +749,39 @@
 					c = getChar(insideString);
 					switch (c) {
 						case '(':
-							expandDefinition("??(", "[", baseOffset); //$NON-NLS-1$ //$NON-NLS-2$
+							expandDefinition("??(", "[", lastContext.getOffset() - lastContext.undoStackSize() - 1); //$NON-NLS-1$ //$NON-NLS-2$
 							c = getChar(insideString);
 							break;
 						case ')':
-							expandDefinition("??)", "]", baseOffset); //$NON-NLS-1$ //$NON-NLS-2$
+							expandDefinition("??)", "]", lastContext.getOffset() - lastContext.undoStackSize() - 1); //$NON-NLS-1$ //$NON-NLS-2$
 							c = getChar(insideString);
 							break;
 						case '<':
-							expandDefinition("??<", "{", baseOffset); //$NON-NLS-1$ //$NON-NLS-2$
+							expandDefinition("??<", "{", lastContext.getOffset() - lastContext.undoStackSize() - 1); //$NON-NLS-1$ //$NON-NLS-2$
 							c = getChar(insideString);
 							break;
 						case '>':
-							expandDefinition("??>", "}", baseOffset); //$NON-NLS-1$ //$NON-NLS-2$
+							expandDefinition("??>", "}", lastContext.getOffset() - lastContext.undoStackSize() - 1); //$NON-NLS-1$ //$NON-NLS-2$
 							c = getChar(insideString);
 							break;
 						case '=':
-							expandDefinition("??=", "#", baseOffset); //$NON-NLS-1$ //$NON-NLS-2$
+							expandDefinition("??=", "#", lastContext.getOffset() - lastContext.undoStackSize() - 1); //$NON-NLS-1$ //$NON-NLS-2$
 							c = getChar(insideString);
 							break;
 						case '/':
-							expandDefinition("??/", "\\", baseOffset); //$NON-NLS-1$ //$NON-NLS-2$
+							expandDefinition("??/", "\\", lastContext.getOffset() - lastContext.undoStackSize() - 1); //$NON-NLS-1$ //$NON-NLS-2$
 							c = getChar(insideString);
 							break;
 						case '\'':
-							expandDefinition("??\'", "^", baseOffset); //$NON-NLS-1$ //$NON-NLS-2$
+							expandDefinition("??\'", "^", lastContext.getOffset() - lastContext.undoStackSize() - 1); //$NON-NLS-1$ //$NON-NLS-2$
 							c = getChar(insideString);
 							break;
 						case '!':
-							expandDefinition("??!", "|", baseOffset); //$NON-NLS-1$ //$NON-NLS-2$
+							expandDefinition("??!", "|", lastContext.getOffset() - lastContext.undoStackSize() - 1); //$NON-NLS-1$ //$NON-NLS-2$
 							c = getChar(insideString);
 							break;
 						case '-':
-							expandDefinition("??-", "~", baseOffset); //$NON-NLS-1$ //$NON-NLS-2$
+							expandDefinition("??-", "~", lastContext.getOffset() - lastContext.undoStackSize() - 1); //$NON-NLS-1$ //$NON-NLS-2$
 							c = getChar(insideString);
 							break;
 						default:
@@ -805,10 +807,10 @@
 				if (c == '<') {
 					c = getChar(false);
 					if (c == '%') {
-						expandDefinition("<%", "{", baseOffset); //$NON-NLS-1$ //$NON-NLS-2$
+						expandDefinition("<%", "{", lastContext.getOffset() - lastContext.undoStackSize() - 1); //$NON-NLS-1$ //$NON-NLS-2$
 						c = getChar(false);
 					} else if (c == ':') {
-						expandDefinition("<:", "[", baseOffset); //$NON-NLS-1$ //$NON-NLS-2$
+						expandDefinition("<:", "[", lastContext.getOffset() - lastContext.undoStackSize() - 1); //$NON-NLS-1$ //$NON-NLS-2$
 						c = getChar(false);
 					} else {
 						// Not a digraph
@@ -818,7 +820,7 @@
 				} else if (c == ':') {
 					c = getChar(false);
 					if (c == '>') {
-						expandDefinition(":>", "]", baseOffset); //$NON-NLS-1$ //$NON-NLS-2$
+						expandDefinition(":>", "]", lastContext.getOffset() - lastContext.undoStackSize() - 1); //$NON-NLS-1$ //$NON-NLS-2$
 						c = getChar(false);
 					} else {
 						// Not a digraph
@@ -828,10 +830,10 @@
 				} else if (c == '%') {
 					c = getChar(false);
 					if (c == '>') {
-						expandDefinition("%>", "}", baseOffset); //$NON-NLS-1$ //$NON-NLS-2$
+						expandDefinition("%>", "}", lastContext.getOffset() - lastContext.undoStackSize() - 1); //$NON-NLS-1$ //$NON-NLS-2$
 						c = getChar(false);
 					} else if (c == ':') {
-						expandDefinition("%:", "#", baseOffset); //$NON-NLS-1$ //$NON-NLS-2$
+						expandDefinition("%:", "#", lastContext.getOffset() - lastContext.undoStackSize() - 1); //$NON-NLS-1$ //$NON-NLS-2$
 						c = getChar(false);
 					} else {
 						// Not a digraph
@@ -845,38 +847,29 @@
 		return c;
 	}
 
-    protected int accountForUndo(int c)
+    protected int readFromStream()
     {
-        boolean done;
-        do {
-        	done = true;
-        
-        	if (scannerData.getContextStack().getCurrentContext().undoStackSize() != 0 ) {
-        		c = scannerData.getContextStack().getCurrentContext().popUndo();
-        	} else {
-        		try {
-        			c = scannerData.getContextStack().getCurrentContext().read();
-        			if (c == NOCHAR) {
-        				if (scannerData.getContextStack().rollbackContext(scannerData.getClientRequestor()) == false) {
-        					c = NOCHAR;
-        					break;
-        				} else {
-        					done = false;
-        				}
-        			}
-        		} catch (IOException e) {
-        			if (scannerData.getContextStack().rollbackContext(scannerData.getClientRequestor()) == false) {
-        				c = NOCHAR;
-        			} else {
-        				done = false;
-        			}
-        		}
-        	}
-        } while (!done);
-        return c;
+    	int c;
+    	try {
+    		c = scannerData.getContextStack().getCurrentContext().read();
+    	}
+    	catch (IOException e) {
+    		c = NOCHAR;
+    	}
+    	
+    	if (c != NOCHAR)
+    		return c;
+    	
+    	if (scannerData.getContextStack().rollbackContext(scannerData.getClientRequestor()) == false)
+    		return NOCHAR;
+    	
+    	if (scannerData.getContextStack().getCurrentContext().undoStackSize() != 0 )
+    		return scannerData.getContextStack().getCurrentContext().popUndo();
+    	
+    	return readFromStream();
     }
 
-	void ungetChar(int c) throws ScannerException{
+	final void ungetChar(int c) throws ScannerException{
 		scannerData.getContextStack().getCurrentContext().pushUndo(c);
 		try
 		{
@@ -1220,9 +1213,12 @@
 		if (directive == null) {
 			if( scannerExtension.canHandlePreprocessorDirective( token ) )
 				scannerExtension.handlePreprocessorDirective( token, getRestOfPreprocessorLine() );
-			StringBuffer buffer = new StringBuffer( "#"); //$NON-NLS-1$
-			buffer.append( token );
-			handleProblem( IProblem.PREPROCESSOR_INVALID_DIRECTIVE, buffer.toString(), beginningOffset, false, true );
+			else
+			{
+				StringBuffer buffer = new StringBuffer( "#"); //$NON-NLS-1$
+				buffer.append( token );
+				handleProblem( IProblem.PREPROCESSOR_INVALID_DIRECTIVE, buffer.toString(), beginningOffset, false, true );
+			}
 			return null;
 		}
 
@@ -1432,12 +1428,12 @@
 
 		c = getChar();				
 		
+		// do the least expensive tests first!
 		while (
-				Character.isUnicodeIdentifierPart( (char)c)
-//			((c >= 'a') && (c <= 'z'))
-//			|| ((c >= 'A') && (c <= 'Z'))
-//			|| ((c >= '0') && (c <= '9'))
-//			|| (c == '_')
+			((c >= 'a') && (c <= 'z'))
+			|| ((c >= 'A') && (c <= 'Z'))
+			|| ((c >= '0') && (c <= '9'))
+			|| (c == '_') || Character.isUnicodeIdentifierPart( (char)c)
 			) {
 			buff.append((char) c);
 			c = getChar();
@@ -2545,7 +2541,37 @@
 		return macroReplacementTokens;
 	}
 	
+	protected IMacroDescriptor createObjectMacroDescriptor(String key, String value ) {
+		StringBuffer signatureBuffer  = new StringBuffer();
+		signatureBuffer.append( key );
+		signatureBuffer.append( ' ' );
+		signatureBuffer.append( value );
+
+//		List macroReplacementTokens;
+//		if (value.trim().equals( "" ))
+//			macroReplacementTokens = new ArrayList();
+//		else
+//			macroReplacementTokens = tokenizeReplacementString( NO_OFFSET_LIMIT, key, value, null ); 
+		
+		List macroReplacementTokens = new ArrayList();
+		if( !value.trim().equals( "" ) )  //$NON-NLS-1$
+		{	
+
+			Token t = new Token(
+					IToken.tIDENTIFIER, 
+					value, 
+					scannerData.getContextStack().getCurrentContext(),
+					scannerData.getContextStack().getCurrentLineNumber()
+					);
+			macroReplacementTokens.add( t );
+		}
 	
+		return new ObjectMacroDescriptor( key, 
+				signatureBuffer.toString(), 
+				macroReplacementTokens, 
+				value);
+		}
+
 	protected void poundDefine(int beginning, int beginningLine ) throws ScannerException, EndOfFileException {
 		StringBuffer potentialErrorMessage = new StringBuffer( POUND_DEFINE );
 		skipOverWhitespace();
@@ -2636,8 +2662,9 @@
 		}
 		else if ((c == '\n') || (c == '\r'))
 		{
-			checkValidMacroRedefinition(key, previousDefinition, "", beginning);				 //$NON-NLS-1$
-			addDefinition( key, "" ); //$NON-NLS-1$
+			descriptor = createObjectMacroDescriptor(key, ""); //$NON-NLS-1$
+			checkValidMacroRedefinition(key, previousDefinition, descriptor, beginning);
+			addDefinition( key, descriptor ); 
 		}
 		else if ((c == ' ') || (c == '\t') ) {
 			// this is a simple definition 
@@ -2646,8 +2673,9 @@
 			// get what we are to map the name to and add it to the definitions list
 			String value = getRestOfPreprocessorLine();
 			
-			checkValidMacroRedefinition(key, previousDefinition, value, beginning);
-			addDefinition( key, value ); 
+			descriptor = createObjectMacroDescriptor(key, value);
+			checkValidMacroRedefinition(key, previousDefinition, descriptor, beginning);
+			addDefinition( key, descriptor ); 
 		
 		} else if (c == '/') {
 			// this could be a comment	
@@ -2655,20 +2683,23 @@
 			if (c == '/') // one line comment
 				{
 				skipOverSinglelineComment();
-				checkValidMacroRedefinition(key, previousDefinition, "", beginning); //$NON-NLS-1$
-				addDefinition(key, ""); //$NON-NLS-1$
+				descriptor = createObjectMacroDescriptor(key, ""); //$NON-NLS-1$
+				checkValidMacroRedefinition(key, previousDefinition, descriptor, beginning);
+				addDefinition(key, descriptor); 
 			} else if (c == '*') // multi-line comment
 				{
 				if (skipOverMultilineComment()) {
 					// we have gone over a newline
 					// therefore, this symbol was defined to an empty string
-					checkValidMacroRedefinition(key, previousDefinition, "", beginning); //$NON-NLS-1$
-					addDefinition(key, ""); //$NON-NLS-1$
+					descriptor = createObjectMacroDescriptor(key, ""); //$NON-NLS-1$
+					checkValidMacroRedefinition(key, previousDefinition, descriptor, beginning); 
+					addDefinition(key, descriptor);
 				} else {
 					String value = getRestOfPreprocessorLine();
 					
-					checkValidMacroRedefinition(key, previousDefinition, "", beginning); //$NON-NLS-1$
-					addDefinition(key, value);
+					descriptor = createObjectMacroDescriptor(key, value);
+					checkValidMacroRedefinition(key, previousDefinition, descriptor, beginning); 
+					addDefinition(key, descriptor);
 				}
 			} else {
 				// this is not a comment 
@@ -2698,40 +2729,6 @@
             /* do nothing */
         } 
 	}
-
-	protected void checkValidMacroRedefinition(
-			String key,
-			IMacroDescriptor previousDefinition,
-			String newDefinition, int beginningOffset )
-	throws ScannerException 
-	{
-		StringBuffer buffer = new StringBuffer(key);
-		buffer.append( ' ');
-		buffer.append(newDefinition);
-		IMacroDescriptor newMacro = new ObjectMacroDescriptor( key, buffer.toString(), 
-				tokenizeReplacementString( NO_OFFSET_LIMIT, key, newDefinition, null ), newDefinition );
-		checkValidMacroRedefinition( key, previousDefinition, newMacro, beginningOffset );
-	}
-	
-	
-	protected void checkValidMacroRedefinition(
-			String key,
-			String previousDefinition,
-			String newDefinition, int beginningOffset )
-	throws ScannerException 
-	{
-		StringBuffer oldMacro = new StringBuffer( key );
-		oldMacro.append( ' ');
-		StringBuffer newMacro = new StringBuffer( oldMacro.toString() );
-		oldMacro.append( previousDefinition );
-		newMacro.append( newDefinition );
-		IMacroDescriptor prevMacroDescriptor = new ObjectMacroDescriptor( key, oldMacro.toString(), 
-				tokenizeReplacementString( NO_OFFSET_LIMIT, key, previousDefinition, null ), previousDefinition );
-		IMacroDescriptor newMacroDescriptor = new ObjectMacroDescriptor( key, newMacro.toString(), 
-				tokenizeReplacementString( NO_OFFSET_LIMIT, key, newDefinition, null ), newDefinition );
-		checkValidMacroRedefinition( key, prevMacroDescriptor, newMacroDescriptor, beginningOffset );
-	}
-	
 
 	protected void checkValidMacroRedefinition(
 		String key,
Index: parser/org/eclipse/cdt/internal/core/parser/scanner/ScannerContext.java
===================================================================
retrieving revision 1.1
diff -u -r1.1 ScannerContext.java
--- parser/org/eclipse/cdt/internal/core/parser/scanner/ScannerContext.java	22 Jan 2004 20:15:26 -0000	1.1
+++ parser/org/eclipse/cdt/internal/core/parser/scanner/ScannerContext.java	16 Mar 2004 16:54:42 -0000
@@ -12,7 +12,6 @@
 
 import java.io.IOException;
 import java.io.Reader;
-import java.util.Stack;
 
 import org.eclipse.cdt.core.parser.ast.IASTInclusion;
 
@@ -24,7 +23,6 @@
     private int macroLength = -1;
 	private int line = 1;
 	private int offset;
-	private Stack undo = new Stack(); 
 	private ContextKind kind; 
 				
     /* (non-Javadoc)
@@ -108,18 +106,15 @@
 		return line;
 	}
 
-	/**
-	 * Returns the reader.
-	 * @return Reader
+	/*	 there are never more than 2 elements in the unget stack! 
+	 *   trigraphs may involve 2, but in general there is a single element
+	 *	 I have made room for 10 -- just in case :-) 
 	 */
-	public final Reader getReader()
-	{
-		return reader;
-	}
-
+	private int pos = 0;
+	private int undo[] = new int[10];  
 	public final int undoStackSize()
 	{
-		return undo.size();
+		return pos;
 	}
 
 	/**
@@ -128,7 +123,7 @@
 	 */
 	public final int popUndo()
 	{
-		int c = ((Integer)undo.pop()).intValue();
+		int c = undo[--pos];
 		if ((char)c == '\n') line++;
 		return c;
 	}
@@ -137,10 +132,18 @@
 	 * Sets the undo.
 	 * @param undo The undo to set
 	 */
-	public void pushUndo(int undo)
+	public final void pushUndo(int c)
 	{
-		if ((char)undo == '\n') line--;
-		this.undo.push( new Integer( undo )); 
+		if ((char)c == '\n') line--;
+		undo[pos++] = c; 
+	}
+	/**
+	 * Returns the reader.
+	 * @return Reader
+	 */
+	public final Reader getReader()
+	{
+		return reader;
 	}
Follow-Ups:
- Applid [HEAD] Re: [cdt-patch] Patch for Scanner performance
  - From: Andrew Niefer
Prev by Date: [cdt-patch] Error Parser Tests Patch
Next by Date: [cdt-patch] Externalizing Strings in ManagedBuilder.
Previous by thread: [cdt-patch] Error Parser Tests Patch
Next by thread: Applid [HEAD] Re: [cdt-patch] Patch for Scanner performance
Index(es):
- Date
- Thread
Breadcrumbs