Job aborted due to stage failure: Task 2049 in stage 515.0 failed 5 times, most recent failure: Lost task 2049.3 in stage 515.0 (TID 241301, n11-147-025.byted.org, executor 1078): java.lang.RuntimeException: Cannot reserve additional contiguous bytes in the vectorized reader (requested 590748783 bytes). As a workaround, you can disable the vectorized reader. For parquet file format, refer to spark.sql.parquet.enableVectorizedReader; for orc file format, refer to spark.sql.orc.enableVectorizedReader.
details
Job aborted due to stage failure: Task 2049 in stage 515.0 failed 5 times, most recent failure: Lost task 2049.3 in stage 515.0 (TID 241301, n11-147-025.byted.org, executor 1078): java.lang.RuntimeException: Cannot reserve additional contiguous bytes in the vectorized reader (requested 590748783 bytes). As a workaround, you can disable the vectorized reader. For parquet file format, refer to spark.sql.parquet.enableVectorizedReader; for orc file format, refer to spark.sql.orc.enableVectorizedReader. at org.apache.spark.sql.execution.vectorized.WritableColumnVector.throwUnsupportedException(WritableColumnVector.java:106) at org.apache.spark.sql.execution.vectorized.WritableColumnVector.reserve(WritableColumnVector.java:92) at org.apache.spark.sql.execution.vectorized.WritableColumnVector.appendBytes(WritableColumnVector.java:471) at org.apache.spark.sql.execution.vectorized.OnHeapColumnVector.putByteArray(OnHeapColumnVector.java:497) at org.apache.spark.sql.execution.datasources.parquet.VectorizedPlainValuesReader.readBinary(VectorizedPlainValuesReader.java:201) at org.apache.spark.sql.execution.datasources.parquet.VectorizedRlevaluesReader.readBinary(VectorizedRlevaluesReader.java:505) at org.apache.spark.sql.execution.datasources.parquet.VectorizedColumnReader.readBinaryBatch(VectorizedColumnReader.java:632) at org.apache.spark.sql.execution.datasources.parquet.VectorizedColumnReader.readBatch(VectorizedColumnReader.java:253) at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetPartialRecordReader.readBatch(VectorizedParquetPartialRecordReader.java:183) at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetPartialRecordReader.readSpecificBatch(VectorizedParquetPartialRecordReader.java:158) at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.nextBatch(VectorizedParquetRecordReader.java:320) at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.nextKeyValue(VectorizedParquetRecordReader.java:195) at org.apache.spark.sql.execution.datasources.RecordReaderIterator.hasNext(RecordReaderIterator.scala:39) at org.apache.spark.sql.execution.datasources.NormalFileScanIterator$$anonfun.apply$mcZ$sp(NormalFileScanIterator.scala:50) at org.apache.spark.sql.execution.datasources.NormalFileScanIterator$$anonfun.apply(NormalFileScanIterator.scala:50) at org.apache.spark.sql.execution.datasources.NormalFileScanIterator$$anonfun.apply(NormalFileScanIterator.scala:50) at org.apache.spark.sql.execution.datasources.FileScanIterator.traceReadTime(FileScanIterator.scala:129) at org.apache.spark.sql.execution.datasources.NormalFileScanIterator.hasNext(NormalFileScanIterator.scala:50) at org.apache.spark.sql.execution.datasources.NormalFileScanIterator.nextIterator(NormalFileScanIterator.scala:80) at org.apache.spark.sql.execution.datasources.NormalFileScanIterator.hasNext(NormalFileScanIterator.scala:51) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage0.scan_nextBatch_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage0.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$$anon.hasNext(WholeStageCodegenExec.scala:731) at scala.collection.Iterator$$anon.hasNext(Iterator.scala:461) at scala.collection.Iterator$$anon.hasNext(Iterator.scala:408) at scala.collection.Iterator$$anon.hasNext(Iterator.scala:408) at org.apache.spark.shuffle.sort.UnsafeShuffleWriter.write(UnsafeShuffleWriter.java:198) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) at org.apache.spark.scheduler.Task.run(Task.scala:112) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:359) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Caused by: java.lang.OutOfMemoryError: Java heap space at org.apache.spark.sql.execution.vectorized.OnHeapColumnVector.reserveInternal(OnHeapColumnVector.java:565) at org.apache.spark.sql.execution.vectorized.WritableColumnVector.reserve(WritableColumnVector.java:90) at org.apache.spark.sql.execution.vectorized.WritableColumnVector.appendBytes(WritableColumnVector.java:471) at org.apache.spark.sql.execution.vectorized.OnHeapColumnVector.putByteArray(OnHeapColumnVector.java:497) at org.apache.spark.sql.execution.datasources.parquet.VectorizedPlainValuesReader.readBinary(VectorizedPlainValuesReader.java:201) at org.apache.spark.sql.execution.datasources.parquet.VectorizedRlevaluesReader.readBinary(VectorizedRlevaluesReader.java:505) at org.apache.spark.sql.execution.datasources.parquet.VectorizedColumnReader.readBinaryBatch(VectorizedColumnReader.java:632) at org.apache.spark.sql.execution.datasources.parquet.VectorizedColumnReader.readBatch(VectorizedColumnReader.java:253) at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetPartialRecordReader.readBatch(VectorizedParquetPartialRecordReader.java:183) at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetPartialRecordReader.readSpecificBatch(VectorizedParquetPartialRecordReader.java:158) at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.nextBatch(VectorizedParquetRecordReader.java:320) at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.nextKeyValue(VectorizedParquetRecordReader.java:195) at org.apache.spark.sql.execution.datasources.RecordReaderIterator.hasNext(RecordReaderIterator.scala:39) at org.apache.spark.sql.execution.datasources.NormalFileScanIterator$$anonfun.apply$mcZ$sp(NormalFileScanIterator.scala:50) at org.apache.spark.sql.execution.datasources.NormalFileScanIterator$$anonfun.apply(NormalFileScanIterator.scala:50) at org.apache.spark.sql.execution.datasources.NormalFileScanIterator$$anonfun.apply(NormalFileScanIterator.scala:50) at org.apache.spark.sql.execution.datasources.FileScanIterator.traceReadTime(FileScanIterator.scala:129) at org.apache.spark.sql.execution.datasources.NormalFileScanIterator.hasNext(NormalFileScanIterator.scala:50) at org.apache.spark.sql.execution.datasources.NormalFileScanIterator.nextIterator(NormalFileScanIterator.scala:80) at org.apache.spark.sql.execution.datasources.NormalFileScanIterator.hasNext(NormalFileScanIterator.scala:51) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage0.scan_nextBatch_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage0.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$$anon.hasNext(WholeStageCodegenExec.scala:731) at scala.collection.Iterator$$anon.hasNext(Iterator.scala:461) at scala.collection.Iterator$$anon.hasNext(Iterator.scala:408) at scala.collection.Iterator$$anon.hasNext(Iterator.scala:408) at org.apache.spark.shuffle.sort.UnsafeShuffleWriter.write(UnsafeShuffleWriter.java:198) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) at org.apache.spark.scheduler.Task.run(Task.scala:112) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:359) Driver stacktrace:问题
Caused by: java.lang.OutOfMemoryError: Java heap space
解决参考 Spark java.lang.OutOfMemoryError: Java heap space
set spark.driver.cores = "4"; set spark.driver.memory = "40g"; set executor-memory = "30G"; set spark.executor.cores = "2" set spark.sql.parquet.enableVectorizedReader = false;
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)