Linear Regression Example using Apache Spark

Example code

import org.apache.spark.ml.feature.VectorAssembler import org.apache.spark.ml.regression.LinearRegression import org.apache.spark.sql.SparkSession object LinearRegressionExample { def main(args: Array[String]): Unit = { val spark = SparkSession .builder() .appName("Linear Regression Example") .getOrCreate() val loadOptions = Map("sep" -> "\t", "header" -> "true", "mode" -> "FAILFAST", "inferSchema" -> "true") val dataPath = "mtcars.tsv" val mtcars = spark.read.options(loadOptions).csv(dataPath) val assembler = new VectorAssembler() .setInputCols(Array("wt", "cyl")) .setOutputCol("features") val output = assembler.transform(mtcars) val extractedData = output.select("features", "mpg") val linearRegression = new LinearRegression() .setFeaturesCol("features") .setLabelCol("mpg") val model = linearRegression.fit(extractedData) println(s"Weights: ${model.coefficients} Intercept: ${model.intercept}") spark.close() } }

scikit-learn이나 R과 비교하면 복잡하긴하다...

Todo: PipeLine() 사용한 코드로 수정할 것.