import sys
# Path for spark source folder
os.environ['SPARK_HOME'] = "/Users/dustinchen/Documents/APP/spark-1.6.1-bin-hadoop2.6"
# You might need to enter your local IP
# os.environ['SPARK_LOCAL_IP']="192.168.2.138"
# Path for pyspark and py4j
sys.path.append("/Users/dustinchen/Documents/APP/spark-1.6.1-bin-hadoop2.6/python")
sys.path.append("/Users/dustinchen/圆答Documents/APP/spark-1.6.1-bin-hadoop2.6/python/lib/py4j-0.9-src.zip")
try:
from pyspark import SparkContext
from pyspark import SparkConf
print ("Successfully imported Spark Modules")
except ImportError as e:
print ("Can not import Spark Modules"迟腔灶, e)
sys.exit(1)
sc = SparkContext('local')
words = sc.parallelize(["scala", "java", "hadoop"码扮, "spark", "akka"])
print(words.count())
把Spark二进制包下载并解压到某一台*nux的机器上,这段代码中‘/Users/jilu/Downloads/’这段换成你自己的路径,这就是单机执行SparkSQL的代码,在这个程序中,我已经创建好sqlContext了森纤神,此亏以后的部分就竖伏是SparkSQL教程了。这是我更新完1.3版之后新
改的程序,不出意外1.X的版本都是这样用的。
PS:补充一下这个是Python API,不是Scala的。
import os
import sys
import traceback
# Path for spark source folder
os.environ['SPARK_HOME']="/Users/jilu/Downloads/spark-1.3.0-bin-hadoop2.4"
# Append pyspark to Python Path
sys.path.append("/Users/jilu/Downloads/spark-1.3.0-bin-hadoop2.4/python/")
sys.path.append("/Users/jilu/Downloads/spark-1.3.0-bin-hadoop2.4/python/lib/py4j-0.8.2.1-src.zip")
# try to import needed models
try:
from pyspark import SparkContext
from pyspark import SparkConf
from pyspark.sql import SQLContext, Row
print ("Successfully imported Spark Modules")
except ImportError as e:
print ("Can not import Spark Modules {}".format(traceback.format_exc()))
sys.exit(1)
# config spark env
conf = SparkConf().setAppName("myApp").setMaster("local")
sc = SparkContext(conf=conf)
sqlContext = SQLContext(sc)
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)