{"cells":[{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["SAM018 - Basic Spark R Example\n","==============================\n","\n","Description\n","-----------\n","\n","TODO\n","\n","### Steps\n","\n","Explain steps here"]},{"cell_type":"code","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["x \u003c- c(1, 2, 3)"]},{"cell_type":"code","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["spark"]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["The following is from https://spark.apache.org/docs/latest/sparkr.html"]},{"cell_type":"code","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["head(mtcars, 6)"]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["### Create the SparkDataFrame"]},{"cell_type":"code","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["df \u003c- as.DataFrame(mtcars)"]},{"cell_type":"code","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["head(df, 6)"]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["Select column “mpg”"]},{"cell_type":"code","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["head(select(df, df$mpg), 6)"]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["also pass in column name as strings"]},{"cell_type":"code","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["head(select(df, \"qsec\"), 6)"]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["Filter the SparkDataFrame to only retain rows with weights less than\n","3.25 tonnes"]},{"cell_type":"code","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["head(filter(df, df$wt \u003c 3.25), 6)"]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["### Grouping, aggregation\n","\n","Use the `n` operator to count the number of times each cylinder count\n","appears"]},{"cell_type":"code","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["head(summarize(groupBy(df, df$cyl), count = n(df$cyl)))"]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["Sort the output from the aggregation to get the most common numbers of\n","gears"]},{"cell_type":"code","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["gear \u003c- summarize(groupBy(df, df$gear), count = n(df$gear))\n","head(arrange(gear, desc(gear$count)))"]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["OLAP cube operators cube:"]},{"cell_type":"code","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["head(agg(cube(df, \"cyl\", \"disp\", \"gear\"), avg(df$mpg)))"]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["and rollup"]},{"cell_type":"code","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["head(agg(rollup(df, \"cyl\", \"disp\", \"gear\"), avg(df$mpg)))"]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["assign a new value to a new column in the same SparkDataFrame"]},{"cell_type":"code","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["df$kpl \u003c- df$mpg * 0.425\n","head(df, 6)"]},{"cell_type":"code","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["print \"Notebook execution is complete.\""]}],"nbformat":4,"nbformat_minor":5,"metadata":{"kernelspec":{"name":"sparkrkernel","display_name":"Spark \\| R"},"pansop":{"related":"","test":{"strategy":"","types":null,"disable":{"reason":"","workitems":null,"types":null}},"target":{"current":"","final":""},"internal":{"parameters":null,"symlink":false},"timeout":"0"},"language_info":{"codemirror_mode":"{ Name: \"\", Version: \"\"}","file_extension":"","mimetype":"","name":"","nbconvert_exporter":"","pygments_lexer":"","version":""},"widgets":[]}}