I parse preprocessor conditions using the regular expressions. First all, I match all preprocessor directives with the regex
@"# *(?<Directive>if|elif|ifdef|ifndef) .*"
I do not need else, define and endif directives since I'm trying to detect only conditionals that "declares" symbols.
All preprocessor directive lines are assumed to be correct, since their can be compiled successfully.
For each preprocessor directive matched before, I extract the Directive captured group, and depending on the group value I execute different regular expression.
In the case of #if of #elif:
@"(?<Condition>defined *\( *(?<Symbol>\w+) *\)|(?<Symbol>\w+))( *(\|\||&&) *\k<Condition>)*"
In the case of #ifdef or #ifndef:
@"(?<Symbol>\w+)"
(Note the C# syntax for @ )
Most of C preprocessor directives are parsed correctly:
#if _SYMBOL_
#if defined(_SYMBOL_)
#if defined(_SYMBOL_) && defined(_ANOTHER_) || defined(_SYMBOL_)
#ifdef _SYMBOL_
#ifndef _SYMBOL_
#elif _SYMBOL_
...
A missing feature would be the support for comparative operators. I understand that this solution doesn't support parenthesis (also nested), but it would be an acceptable simplication.
Is it possible to simplify those regular expressions? (Especially white-space management).
Finally, here is my current code.
public static List<string> DetectPreprocessorDependencies(IEnumerable<string> source)
{
SortedList<string,string> symbols = new SortedList<string,string>();
if (source == null)
throw new ArgumentNullException("source");
foreach (string sourceLine in source) {
MatchCollection matchCollection = sPpConditionalRegex.Matches(sourceLine);
if (matchCollection.Count > 0) {
foreach (Match match in matchCollection) {
MatchCollection symbolMatches = null;
Group directiveGroup = match.Groups["Directive"];
string directive = directiveGroup.Value;
string condition = sourceLine.Substring(directiveGroup.Index + directiveGroup.Length, match.Length - (directiveGroup.Index - match.Index + directiveGroup.Length));
// Remove spaces from condition
condition = condition.Trim();
if ((directive == "if") || (directive == "elif")) {
// Legal patterns:
// - #if defined(A) { {||,&&) defined(B) {||,&&} ... }
// - #if A {>,>=,<,<=,==,!=,} B
// - Any composition of all above
symbolMatches = sPpIfConditionalSymbolRegex.Matches(condition);
} else if ((directive == "ifdef") || (directive == "ifndef")) {
// Legal patterns:
// - #ifdef SYMBOL
// - #ifndef SYMBOL
symbolMatches = sPpIfdefConditionalSymbolRegex.Matches(condition);
}
if ((symbolMatches != null) && (symbolMatches.Count > 0)) {
foreach (Match symbolMatch in symbolMatches) {
Group symbolGroup = symbolMatch.Groups["Symbol"];
foreach (Capture symbolCapture in symbolGroup.Captures) {
string symbol = symbolCapture.Value;
if (symbols.ContainsKey(symbol) == false)
symbols.Add(symbol, symbol);
}
}
}
}
}
}
return (new List<string>(symbols.Keys));
}
/// <summary>
/// Regular expression used for matching preprocessor conditional directives.
/// </summary>
private static readonly Regex sPpConditionalRegex = new Regex(@"# *(?<Directive>if|elif|ifdef|ifndef) .*", RegexOptions.Multiline | RegexOptions.Compiled);
/// <summary>
/// Regular expression used for matching preprocessor conditional directives.
/// </summary>
private static readonly Regex sPpIfConditionalSymbolRegex = new Regex(@"(?<Condition>defined *\( *(?<Symbol>\w+) *\)|(?<Symbol>\w+))( *(\|\||&&) *\k<Condition>)*", RegexOptions.Multiline | RegexOptions.Compiled);
/// <summary>
/// Regular expression used for matching preprocessor conditional directives.
/// </summary>
private static readonly Regex sPpIfdefConditionalSymbolRegex = new Regex(@"(?<Symbol>\w+)", RegexOptions.Multiline | RegexOptions.Compiled);