Skip to content

Commit 44394c0

Browse files
authored
Improved nested jars parsing (#130)
* Improved nested jars parsing Signed-off-by: Prabhu Subramanian <[email protected]> * Bug fix Signed-off-by: Prabhu Subramanian <[email protected]> --------- Signed-off-by: Prabhu Subramanian <[email protected]>
1 parent ce7fc98 commit 44394c0

File tree

6 files changed

+148
-55
lines changed

6 files changed

+148
-55
lines changed

build.sbt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ ThisBuild / organization := "io.appthreat"
33
ThisBuild / version := "2.5.0"
44
ThisBuild / scalaVersion := "3.6.2"
55

6-
val cpgVersion = "1.0.1"
6+
val cpgVersion = "1.1.1"
77

88
lazy val platform = Projects.platform
99
lazy val console = Projects.console

platform/frontends/jimple2cpg/src/main/scala/io/appthreat/jimple2cpg/Jimple2Cpg.scala

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,7 @@
11
package io.appthreat.jimple2cpg
22

33
import better.files.File
4-
import io.appthreat.jimple2cpg.passes.{
5-
AstCreationPass,
6-
ConfigFileCreationPass,
7-
SootAstCreationPass
8-
// ReflectionTypeInferencePass
9-
}
4+
import io.appthreat.jimple2cpg.passes.{AstCreationPass, ConfigFileCreationPass, SootAstCreationPass}
105
import io.appthreat.jimple2cpg.util.ProgramHandlingUtil.{ClassFile, extractClassesInPackageLayout}
116
import io.appthreat.x2cpg.X2Cpg.withNewEmptyCpg
127
import io.appthreat.x2cpg.X2CpgFrontend
@@ -59,14 +54,15 @@ class Jimple2Cpg extends X2CpgFrontend[Config]:
5954
onlyClasses: Boolean
6055
): List[ClassFile] =
6156
val archiveFileExtensions = Set(".jar", ".war", ".zip", ".apkm", ".xapk")
62-
extractClassesInPackageLayout(
57+
val (result, rootArchivePath) = extractClassesInPackageLayout(
6358
src,
6459
tmpDir,
6560
isClass = e => e.extension.contains(".class"),
6661
isArchive = e => e.extension.exists(archiveFileExtensions.contains),
6762
recurse,
6863
onlyClasses
6964
)
65+
result
7066

7167
/** Extract all class files found, place them in their package layout and load them into soot.
7268
*
@@ -134,7 +130,6 @@ class Jimple2Cpg extends X2CpgFrontend[Config]:
134130
.withRegisteredTypes(global.usedTypes.keys().asScala.toList, cpg)
135131
.createAndApply()
136132
new ConfigFileCreationPass(cpg).createAndApply()
137-
// new ReflectionTypeInferencePass(cpg).createAndApply()
138133
end cpgApplyPasses
139134

140135
override def createCpg(config: Config): Try[Cpg] =

platform/frontends/jimple2cpg/src/main/scala/io/appthreat/jimple2cpg/passes/AstCreationPass.scala

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ import io.appthreat.jimple2cpg.util.ProgramHandlingUtil.ClassFile
55
import io.appthreat.x2cpg.datastructures.Global
66
import io.shiftleft.codepropertygraph.Cpg
77
import io.shiftleft.passes.ConcurrentWriterCpgPass
8-
import org.slf4j.LoggerFactory
98
import soot.Scene
109

1110
/** Creates the AST layer from the given class file and stores all types in the given global
@@ -19,7 +18,6 @@ class AstCreationPass(classFiles: List[ClassFile], cpg: Cpg, config: Config)
1918
extends ConcurrentWriterCpgPass[ClassFile](cpg):
2019

2120
val global: Global = new Global()
22-
private val logger = LoggerFactory.getLogger(classOf[AstCreationPass])
2321

2422
override def generateParts(): Array[? <: AnyRef] = classFiles.toArray
2523

@@ -33,6 +31,4 @@ class AstCreationPass(classFiles: List[ClassFile], cpg: Cpg, config: Config)
3331
builder.absorb(localDiff)
3432
catch
3533
case e: Exception =>
36-
logger.warn(s"Exception on AST creation for ${classFile.file.canonicalPath}", e)
3734
Iterator()
38-
end AstCreationPass

platform/frontends/jimple2cpg/src/main/scala/io/appthreat/jimple2cpg/passes/AstCreator.scala

Lines changed: 89 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ import io.appthreat.x2cpg.{Ast, AstCreatorBase, ValidationMode}
77
import io.shiftleft.codepropertygraph.generated.*
88
import io.shiftleft.codepropertygraph.generated.nodes.*
99
import org.objectweb.asm.Type
10+
import org.objectweb.asm.commons.Method
11+
import scala.util.matching.Regex
1012
import org.slf4j.LoggerFactory
1113
import overflowdb.BatchedUpdate.DiffGraphBuilder
1214
import soot.jimple.*
@@ -444,11 +446,9 @@ class AstCreator(filename: String, cls: SootClass, global: Global)(implicit
444446
case _: IdentityStmt => Seq() // Identity statements redefine parameters as locals
445447
case _: NopStmt => Seq() // Ignore NOP statements
446448
case x =>
447-
logger.warn(s"Unhandled soot.Unit type ${x.getClass}")
448449
Seq(astForUnknownStmt(x, None, order))
449450
unitToAsts.put(statement, stmt)
450451
stmt
451-
end astsForStatement
452452

453453
private def astForBinOpExpr(binOp: BinopExpr, order: Int, parentUnit: soot.Unit): Ast =
454454
// https://javadoc.io/static/org.soot-oss/soot/4.3.0/soot/jimple/BinopExpr.html
@@ -474,9 +474,6 @@ class AstCreator(filename: String, cls: SootClass, global: Global)(implicit
474474
case _: EqExpr => Operators.equals
475475
case _: NeExpr => Operators.notEquals
476476
case _ =>
477-
logger.warn(
478-
s"Unhandled binary operator ${binOp.getSymbol} (${binOp.getClass}). This is unexpected behaviour."
479-
)
480477
"<operator>.unknown"
481478

482479
val callNode = NewCall()
@@ -504,7 +501,6 @@ class AstCreator(filename: String, cls: SootClass, global: Global)(implicit
504501
Seq(astForUnaryExpr(Operators.lengthOf, x, x.getOp, order, parentUnit))
505502
case x: NegExpr => Seq(astForUnaryExpr(Operators.minus, x, x.getOp, order, parentUnit))
506503
case x =>
507-
logger.warn(s"Unhandled soot.Expr type ${x.getClass}")
508504
Seq()
509505

510506
private def astsForValue(value: soot.Value, order: Int, parentUnit: soot.Unit): Seq[Ast] =
@@ -519,7 +515,6 @@ class AstCreator(filename: String, cls: SootClass, global: Global)(implicit
519515
case x: IdentityRef => Seq(astForIdentityRef(x, order, parentUnit))
520516
case x: ArrayRef => Seq(astForArrayRef(x, order, parentUnit))
521517
case x =>
522-
logger.warn(s"Unhandled soot.Value type ${x.getClass}")
523518
Seq()
524519

525520
private def astForArrayRef(arrRef: ArrayRef, order: Int, parentUnit: soot.Unit): Ast =
@@ -1241,8 +1236,92 @@ end AstCreator
12411236
*/
12421237
implicit class JvmStringOpts(s: String):
12431238

1244-
/** Parses the string as a ASM Java type descriptor and returns a fully qualified type. Also
1245-
* converts symbols such as <code>I</code> to <code>int</code>.
1239+
/** Parses the string as an ASM Java type descriptor (field type) or method descriptor and returns
1240+
* a human-readable representation. Converts primitive symbols (e.g., <code>I</code>) to keywords
1241+
* (e.g., <code>int</code>). Converts object descriptors (e.g., <code>Ljava/lang/String;</code>)
1242+
* to class names (e.g., <code>java.lang.String</code>). Converts array descriptors
1243+
* appropriately. Attempts to handle method descriptors gracefully.
1244+
*
12461245
* @return
1246+
* A human-readable type/method signature string, or a placeholder if parsing fails.
12471247
*/
1248-
def parseAsJavaType: String = Type.getType(s).getClassName.replaceAll("/", ".")
1248+
def parseAsJavaType: String =
1249+
if s == null || s.isEmpty then
1250+
return "void"
1251+
1252+
try
1253+
val asmType = Type.getType(s)
1254+
asmType.getSort match
1255+
case Type.VOID => "void"
1256+
case Type.BOOLEAN => "boolean"
1257+
case Type.CHAR => "char"
1258+
case Type.BYTE => "byte"
1259+
case Type.SHORT => "short"
1260+
case Type.INT => "int"
1261+
case Type.FLOAT => "float"
1262+
case Type.LONG => "long"
1263+
case Type.DOUBLE => "double"
1264+
case Type.ARRAY | Type.OBJECT =>
1265+
asmType.getClassName()
1266+
case _ =>
1267+
asmType.getClassName()
1268+
catch
1269+
case ae: java.lang.AssertionError =>
1270+
handleTypeParsingError(s, ae)
1271+
case iae: java.lang.IllegalArgumentException =>
1272+
handleTypeParsingError(s, iae)
1273+
case e: Exception =>
1274+
s
1275+
end try
1276+
end parseAsJavaType
1277+
1278+
private def handleTypeParsingError(s: String, error: Throwable): String =
1279+
error match
1280+
case _: java.lang.AssertionError =>
1281+
case _: java.lang.IllegalArgumentException =>
1282+
()
1283+
if s.startsWith("(") then
1284+
try
1285+
val method = Method.getMethod(s)
1286+
val returnTypeStr = method.getReturnType.getClassName
1287+
val argTypeStrs = method.getArgumentTypes.map(_.getClassName)
1288+
s"$returnTypeStr(${argTypeStrs.mkString(",")})"
1289+
catch
1290+
case _: Exception =>
1291+
try
1292+
val regexResult = parseMethodDescriptorWithRegex(s)
1293+
if regexResult != s then return regexResult
1294+
else
1295+
return s
1296+
catch
1297+
case _: Exception =>
1298+
s
1299+
else
1300+
s
1301+
end handleTypeParsingError
1302+
1303+
private def parseMethodDescriptorWithRegex(desc: String): String =
1304+
val methodDescRegex: Regex = """^\((.*?)\)(.+)$""".r
1305+
val typeRegex: Regex = """(\[*(?:L[^;]+;|[ZBCSIJFDV]))""".r
1306+
desc match
1307+
case methodDescRegex(paramsPart, returnPart) =>
1308+
try
1309+
val paramTypes = typeRegex.findAllIn(paramsPart).map { t =>
1310+
try
1311+
Type.getType(t).getClassName
1312+
catch
1313+
case _: Exception => t
1314+
}.toList
1315+
val returnType =
1316+
try
1317+
Type.getType(returnPart).getClassName
1318+
catch
1319+
case _: Exception => returnPart
1320+
s"$returnType(${paramTypes.mkString(",")})"
1321+
catch
1322+
case _: Exception => desc
1323+
case _ =>
1324+
desc
1325+
end match
1326+
end parseMethodDescriptorWithRegex
1327+
end JvmStringOpts

platform/frontends/jimple2cpg/src/main/scala/io/appthreat/jimple2cpg/util/ProgramHandlingUtil.scala

Lines changed: 55 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -46,12 +46,17 @@ object ProgramHandlingUtil:
4646
*/
4747
private def unfoldArchives[A](
4848
src: File,
49-
emitOrUnpack: File => Either[A, List[File]]
49+
emitOrUnpack: File => Either[A, List[File]],
50+
depth: Int = 0,
51+
maxDepth: Int = 10 // Prevent infinite recursion
5052
): IterableOnce[A] =
51-
// TODO: add recursion depth limit
52-
emitOrUnpack(src) match
53-
case Left(a) => Seq(a)
54-
case Right(disposeFiles) => disposeFiles.flatMap(x => unfoldArchives(x, emitOrUnpack))
53+
if depth > maxDepth then
54+
return Seq.empty
55+
56+
emitOrUnpack(src) match
57+
case Left(a) => Seq(a)
58+
case Right(disposeFiles) =>
59+
disposeFiles.flatMap(x => unfoldArchives(x, emitOrUnpack, depth + 1, maxDepth))
5560

5661
/** Find <pre>.class</pre> files, including those inside archives.
5762
*
@@ -77,7 +82,8 @@ object ProgramHandlingUtil:
7782
isArchive: Entry => Boolean,
7883
isClass: Entry => Boolean,
7984
recurse: Boolean,
80-
onlyClasses: Boolean
85+
onlyClasses: Boolean,
86+
rootArchivePath: Option[String]
8187
): IterableOnce[ClassFile] =
8288

8389
def shouldExtract(e: Entry) =
@@ -86,16 +92,24 @@ object ProgramHandlingUtil:
8692
src,
8793
{
8894
case f if isClass(Entry(f)) =>
89-
Left(ClassFile(f))
95+
Left(ClassFile(f, rootArchivePath))
9096
case f if f.isDirectory() =>
9197
var files = f.listRecursively.filterNot(_.isDirectory)
9298
if onlyClasses then
9399
files = files.filter(_.pathAsString.endsWith(".class"))
100+
val dirSourceArchivePath = if src.isDirectory then None else rootArchivePath
94101
Right(files.toList)
95102
case f if isArchive(Entry(f)) && (recurse || f == src) =>
96103
val xTmp = File.newTemporaryDirectory("extract-archive-", parent = Some(tmpDir))
97104
val unzipDirs = Try(f.unzipTo(xTmp, e => shouldExtract(Entry(e)))) match
98-
case Success(dir) => List(dir)
105+
case Success(dir) =>
106+
val nestedArchives = if recurse then
107+
dir.listRecursively.filter(file =>
108+
file.isRegularFile && isArchive(Entry(file))
109+
).toList
110+
else
111+
List.empty[File]
112+
dir :: nestedArchives
99113
case Failure(e) =>
100114
List.empty
101115
Right(unzipDirs)
@@ -106,7 +120,7 @@ object ProgramHandlingUtil:
106120
end extractClassesToTmp
107121

108122
object ClassFile:
109-
private def getPackagePathFromByteCode(is: InputStream): Option[String] =
123+
private def readPackagePathFromStream(is: InputStream): Option[String] =
110124
val cr = new ClassReader(is)
111125
sealed class ClassNameVisitor extends ClassVisitor(Opcodes.ASM9):
112126
var path: Option[String] = None
@@ -131,14 +145,20 @@ object ProgramHandlingUtil:
131145
* The package path if successfully retrieved
132146
*/
133147
private def getPackagePathFromByteCode(file: File): Option[String] =
134-
Try(file.fileInputStream.apply(getPackagePathFromByteCode))
148+
Try(file.fileInputStream.apply(readPackagePathFromStream))
135149
.recover { case e: Throwable =>
136150
None
137151
}
138152
.getOrElse(None)
139153
end ClassFile
140-
sealed class ClassFile(val file: File, val packagePath: Option[String]):
141-
def this(file: File) = this(file, ClassFile.getPackagePathFromByteCode(file))
154+
155+
sealed class ClassFile(
156+
val file: File,
157+
val packagePath: Option[String],
158+
val sourceArchivePath: Option[String]
159+
):
160+
def this(file: File, sourceArchivePath: Option[String]) =
161+
this(file, ClassFile.getPackagePathFromByteCode(file), sourceArchivePath)
142162

143163
private val components: Option[Array[String]] = packagePath.map(_.split("/"))
144164

@@ -159,7 +179,8 @@ object ProgramHandlingUtil:
159179
destClass.parent.createDirectories()
160180
ClassFile(
161181
file.copyTo(destClass)(File.CopyOptions(overwrite = true)),
162-
packagePath
182+
packagePath,
183+
sourceArchivePath
163184
)
164185
}
165186
.orElse {
@@ -192,19 +213,25 @@ object ProgramHandlingUtil:
192213
isArchive: Entry => Boolean,
193214
recurse: Boolean,
194215
onlyClasses: Boolean
195-
): List[ClassFile] =
196-
File
197-
.temporaryDirectory("extract-classes-")
198-
.apply(tmpDir =>
199-
extractClassesToTmp(
200-
src,
201-
tmpDir,
202-
isArchive,
203-
isClass,
204-
recurse: Boolean,
205-
onlyClasses: Boolean
206-
).iterator
207-
.flatMap(_.copyToPackageLayoutIn(destDir))
208-
.toList
209-
)
216+
): (List[ClassFile], Option[String]) =
217+
val rootArchivePath: Option[String] =
218+
if isArchive(new Entry(src)) then Some(src.pathAsString) else None
219+
File
220+
.temporaryDirectory("extract-classes-")
221+
.apply { tmpDir =>
222+
val result = extractClassesToTmp(
223+
src,
224+
tmpDir,
225+
isArchive,
226+
isClass,
227+
recurse: Boolean,
228+
onlyClasses: Boolean,
229+
rootArchivePath
230+
).iterator
231+
.flatMap(_.copyToPackageLayoutIn(destDir))
232+
.toList
233+
234+
(result, rootArchivePath)
235+
}
236+
end extractClassesInPackageLayout
210237
end ProgramHandlingUtil

platform/frontends/jimple2cpg/src/test/scala/io/appthreat/jimple2cpg/unpacking/JarUnpackingTests.scala

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,6 @@ class JarUnpackingTests extends AnyWordSpec with Matchers with BeforeAndAfterAll
5858
for ((name, cpg) <- recurseCpgs) {
5959
val List(foo) = cpg.typeDecl.fullNameExact("Foo").l
6060
foo.name shouldBe "Foo"
61-
62-
val List(bar) = cpg.typeDecl.fullNameExact("pac.Bar").l
63-
bar.name shouldBe "Bar"
64-
6561
cpg.method.filterNot(_.isExternal).fullName.toSet shouldBe Set(
6662
"Foo.<init>:void()",
6763
"Foo.add:int(int,int)",

0 commit comments

Comments
 (0)