This is a RegExp exercice from HackerRank. Basically, I have to write a program (I chose JavaScript) that receives a program as an input (string) and tries to identify the programming language used. We're only concerned by 3 languages: C, Java or Python.
function processData(input) {
console.log(identifyLanguage(input));
}
var identifyLanguage = function (code) {
if (isJava(code))
return 'Java';
else if (isC(code))
return 'C';
else if (isPython(code))
return 'Python';
else return 'C';
}
var isC = function(input) {
const cPreprocessor = /#(include.*?>|define.*)/g;
const cStruct = /\b(typedef.*?;|struct.*?(?=\s*?\{))/g;
const cPointers = /\b\w+\s*->\s*\w+\b/g;
const cFunctionPointerReturn = /[a-zA-Z]+\s+\*\s+([a-zA-Z_]\w*)\s*\([a-zA-Z]*\s*([a-zA-Z_]\w*)\)\s*\{/g;
const cFunctionPointerArg = /[a-zA-Z]+\s*\*?\s*([a-zA-Z_]\w*)\(.*[a-zA-Z]+\s*\*\s*([a-zA-Z_]\w*).*\)\s*\{/g;
return cPreprocessor.test(input) || cStruct.test(input) || cPointers.test(input) ||
cFunctionPointerArg.test(input) || cFunctionPointerReturn.test(input);
}
var isPython = function (input) {
const pythonKeywords = /(class|def|if|while|else|for).*:/g;
const pythonLists = /\[((\d+|("|').+\3),)*(\d+|("|').+\5)\]/g;
const pythonPrint = /\bprint(\s*("|').+\2|\(.*\))/g;
const pythonBoolean = /True|False/g;
return pythonKeywords.test(input) || pythonLists.test(input) ||
pythonPrint.test(input) || pythonBoolean.test(input);
}
var isJava = function(input) {
const javaImport = /\bimport\s*.*?;/g;
const javaClass = /(public|private)?\s*class.*?\{/g;
return javaImport.test(input) || javaClass.test(input);
}
I can't say I'm satisfied with this, although it passes all the test cases, but It feels kinda ugly, non-DRY and amateurish. What can I do to improve it?
Update
The previous code updated after the suggestions provided in the chosen answer by Joseph :
var langPatterns = {
C: [
/#(include.*?>|define.*)/g, //cPreprocessor
/\b(typedef.*?;|struct.*?(?=\s*?\{))/g, //cStruct
/\b\w+\s*->\s*\w+\b/g, //cPointers
/[a-zA-Z]+\s+\*\s+([a-zA-Z_]\w*)\s*\([a-zA-Z]*\s*([a-zA-Z_]\w*)\)\s*\{/g, //cFunctionPointerReturn
/[a-zA-Z]+\s*\*?\s*([a-zA-Z_]\w*)\(.*[a-zA-Z]+\s*\*\s*([a-zA-Z_]\w*).*\)\s*\{/g //cFunctionPointerArg
],
Java: [
/\bimport\s*.*?;/g, //javaImport
/(public|private)?\s*class.*?\{/g //javaClass
],
Python: [
/(class|def|if|while|else|for).*:/g, //pythonKeywords
/\[((\d+|("|').+\3),)*(\d+|("|').+\5)\]/g, //pythonLists
/\bprint(\s*("|').+\2|\(.*\))/g, //pythonPrint
///True|False/g //pythonBoolean
],
}
var identifyLanguage2 = function (code) {
return Object.keys(langPatterns).find(lang =>
langPatterns[lang].some(pattern =>
pattern.test(code)));
}