isinstance会很好地工作:
from pyspark.sql import Dataframefrom pyspark.rdd import RDDdef foo(x): if isinstance(x, RDD): return "RDD" if isinstance(x, Dataframe): return "Dataframe"foo(sc.parallelize([]))## 'RDD'foo(sc.parallelize([("foo", 1)]).toDF())## 'Dataframe'
from functools import singledispatch@singledispatchdef bar(x): pass@bar.register(RDD)def _(arg): return "RDD"@bar.register(Dataframe)def _(arg): return "Dataframe"bar(sc.parallelize([]))## 'RDD'bar(sc.parallelize([("foo", 1)]).toDF())## 'Dataframe'
如果您不介意其他依赖项
multipledispatch也是一个有趣的选择:
from multipledispatch import dispatch@dispatch(RDD)def baz(x): return "RDD"@dispatch(Dataframe)def baz(x): return "Dataframe"baz(sc.parallelize([]))## 'RDD'baz(sc.parallelize([("foo", 1)]).toDF())## 'Dataframe'
最后,最Python化的方法是简单地检查一个接口:
def foobar(x): if hasattr(x, "rdd"): ## It is a Dataframe else: ## It (probably) is a RDD
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)