hadoop - Hive query throwing error "java.nio.channels.ClosedChannelException" -


i trying execute following hive query on amazon emr 5.8 hive 2.3.0

create external table if not exists base_session ( property_num string, visitid string, fullvisitorid string, adate string, sessionstarttime timestamp, totalvisits int, totalhits int, totalpageviews int, totaltimeonsite int, totalbounces int, totalnewvisits int, totalscreenviews int, totaluniquescreenviews int, totaltimeonscreen int, browser string, browserversion string, os string, osver string, screencolors string, screenresolution string, devicecategory string, geonetworkcountry string, geonetworkregion string, orgid int, usertype string, landingscreen string, exitscreen string, org_session_num int, geonetworkmetro string, devicemobilebranding string, start_business_context string, end_business_context string, totalviews int, hits_appinfo_id string, hits_appinfo_name string, hits_appinfo_version string ) row format delimited fields terminated '\t' lines terminated '\n' location '${bucket}/base/session/';   create external table if not exists bt_session ( property_num string, visitid string, fullvisitorid string, adate string, sessionstarttime int, totalvisits int, totalhits int, totalpageviews int, totaltimeonsite int, totalbounces int, totalnewvisits int, totalscreenviews int, totaluniquescreenviews int, totaltimeonscreen int, browser string, browserversion string, os string, osver string, screencolors string, screenresolution string, devicecategory string, geonetworkcountry string, geonetworkregion string, geonetworkmetro string, devicemobilebranding string, totalviews int, hits_appinfo_id string, hits_appinfo_name string, hits_appinfo_version string ) row format delimited fields terminated '\t' lines terminated '\n' location '${bucket}/temp/bt/session/';       create table temp_session_orgid select     sorgid.property_num, sorgid.visitid, sorgid.fullvisitorid, sorgid.adate, sorgid.hits_customvariables_customvarvalue orgid     (         select             *,             row_number() on (partition property_num, visitid, fullvisitorid, adate order hitsid) rn         bt_hits_custom_vars         hits_customvariables_customvarname = 'orgid'     ) sorgid     sorgid.rn = 1 ;  create table temp_session_usertype select     sut.property_num, sut.visitid, sut.fullvisitorid, sut.adate, sut.hits_customvariables_customvarvalue usertype     (         select             *,             row_number() on (partition property_num, visitid, fullvisitorid, adate order hitsid) rn         bt_hits_custom_vars         hits_customvariables_customvarname = 'usertype'     ) sut     sut.rn = 1 ;     create table temp_session_hitdata select distinct sh.property_num, sh.visitid, sh.fullvisitorid, sh.adate, sh2.business_context start_business_context, sh2.appinfo_exitscreenname exitscreen, sh2.appinfo_landingscreenname landingscreen, sh3.business_context end_business_context (     select property_num, visitid, fullvisitorid, adate,     min(hitid) on (partition property_num, visitid, fullvisitorid, adate ) first_id,     max(hitid) on (partition property_num, visitid, fullvisitorid, adate ) last_id     bt_hits ) sh left join bt_hits sh2     on (sh.property_num = sh2.property_num         , sh.visitid = sh2.visitid         , sh.fullvisitorid = sh2.fullvisitorid         , sh.adate = sh2.adate         , sh.first_id = sh2.hitid) left join bt_hits sh3     on (sh.property_num = sh3.property_num         , sh.visitid = sh3.visitid         , sh.fullvisitorid = sh3.fullvisitorid         , sh.adate = sh3.adate         , sh.last_id = sh3.hitid) ;  create table bt_session2 select     s.*, o.orgid, u.usertype, sh.landingscreen, sh.exitscreen, sh.start_business_context, sh.end_business_context     bt_session s     left join temp_session_orgid o         on (o.property_num = s.property_num , o.visitid = s.visitid , o.fullvisitorid = s.fullvisitorid , o.adate = s.adate)     left join temp_session_usertype u         on (u.property_num = s.property_num , u.visitid = s.visitid , u.fullvisitorid = s.fullvisitorid , u.adate = s.adate)     left join temp_session_hitdata sh         on (sh.property_num = s.property_num , sh.visitid = s.visitid , sh.fullvisitorid = s.fullvisitorid , sh.adate = s.adate) ;  insert table base_session select     property_num,     visitid,     fullvisitorid,     adate,     from_unixtime(sessionstarttime) sessionstarttime,     totalvisits,     totalhits,     totalpageviews,     totaltimeonsite,     totalbounces,     totalnewvisits,     totalscreenviews,     totaluniquescreenviews,     totaltimeonscreen,     browser,     browserversion,     os,     osver,     screencolors,     screenresolution,     devicecategory,     geonetworkcountry,     geonetworkregion,     orgid,     usertype,     landingscreen,     exitscreen,     if( orgid not null,         row_number() on (partition property_num, visitid, adate, orgid order sessionstarttime),         null) org_session_num,     geonetworkmetro,     devicemobilebranding,     start_business_context,     end_business_context,     totalviews,     hits_appinfo_id,     hits_appinfo_name,     hits_appinfo_version     bt_session2 ; 

when executing query,i getting following exception :

    vertex failed, vertexname=reducer 2, vertexid=vertex_1504998200702_0026_1_01, diagnostics=[task failed, taskid=task_1504998200702_0026_1_01_000023, diagnostics=[taskattempt 0 failed, info=[container container_1504998200702_0026_01_000061 finished diagnostics set [container failed, exitcode=-104. container [pid=17003,containerid=container_1504998200702_0026_01_000061] running beyond physical memory limits. current usage: 1.5 gb of 1.5 gb physical memory used; 3.2 gb of 7.5 gb virtual memory used. killing container.     dump of process-tree container_1504998200702_0026_01_000061 :         |- pid ppid pgrpid sessid cmd_name user_mode_time(millis) system_time(millis) vmem_usage(bytes) rssmem_usage(pages) full_cmd_line         |- 17003 17000 17003 17003 (bash) 0 0 115810304 679 /bin/bash -c /usr/lib/jvm/java-openjdk/bin/java -server -djava.net.preferipv4stack=true -dhadoop.metrics.log.level=warn  -dlog4j.configuratorclass=org.apache.tez.common.tezlog4jconfigurator -dlog4j.configuration=tez-container-log4j.properties -dyarn.app.container.log.dir=/var/log/hadoop-yarn/containers/application_1504998200702_0026/container_1504998200702_0026_01_000061 -dtez.root.logger=info,cla  -djava.net.preferipv4stack=true -dhadoop.metrics.log.level=warn -xmx1229m -dlog4j.configuratorclass=org.apache.tez.common.tezlog4jconfigurator -dlog4j.configuration=tez-container-log4j.properties -dyarn.app.container.log.dir=/var/log/hadoop-yarn/containers/application_1504998200702_0026/container_1504998200702_0026_01_000061 -dtez.root.logger=info,cla  -djava.io.tmpdir=/mnt/yarn/usercache/hadoop/appcache/application_1504998200702_0026/container_1504998200702_0026_01_000061/tmp org.apache.tez.runtime.task.tezchild ip-10-212-196-172.johndeerecloud.com 41390 container_1504998200702_0026_01_000061 application_1504998200702_0026 1 1>/var/log/hadoop-yarn/containers/application_1504998200702_0026/container_1504998200702_0026_01_000061/stdout 2>/var/log/hadoop-yarn/containers/application_1504998200702_0026/container_1504998200702_0026_01_000061/stderr           |- 17016 17003 17003 17003 (java) 6237 246 3331600384 393016 /usr/lib/jvm/java-openjdk/bin/java -server -djava.net.preferipv4stack=true -dhadoop.metrics.log.level=warn -dlog4j.configuratorclass=org.apache.tez.common.tezlog4jconfigurator -dlog4j.configuration=tez-container-log4j.properties -dyarn.app.container.log.dir=/var/log/hadoop-yarn/containers/application_1504998200702_0026/container_1504998200702_0026_01_000061 -dtez.root.logger=info,cla -djava.net.preferipv4stack=true -dhadoop.metrics.log.level=warn -xmx1229m -dlog4j.configuratorclass=org.apache.tez.common.tezlog4jconfigurator -dlog4j.configuration=tez-container-log4j.properties -dyarn.app.container.log.dir=/var/log/hadoop-yarn/containers/application_1504998200702_0026/container_1504998200702_0026_01_000061 -dtez.root.logger=info,cla -djava.io.tmpdir=/mnt/yarn/usercache/hadoop/appcache/application_1504998200702_0026/container_1504998200702_0026_01_000061/tmp org.apache.tez.runtime.task.tezchild ip-10-212-196-172.johndeerecloud.com 41390 container_1504998200702_0026_01_000061 application_1504998200702_0026 1       container killed on request. exit code 143     container exited non-zero exit code 143     ]], taskattempt 1 failed, info=[error: error while running task ( failure ) : attempt_1504998200702_0026_1_01_000023_1:java.lang.runtimeexception: java.lang.runtimeexception: org.apache.hadoop.hive.ql.metadata.hiveexception: hive runtime error while processing row (tag=0) {"key":{"reducesinkkey0":"89634781","reducesinkkey1":"1412865647","reducesinkkey2":"4585400128498998895","reducesinkkey3":"20141009","reducesinkkey4":"1"},"value":{"_col1":"5268"}} caused by: java.lang.runtimeexception: org.apache.hadoop.hive.ql.metadata.hiveexception: hive runtime error while processing row (tag=0) {"key":{"reducesinkkey0":"89634781","reducesinkkey1":"1498837079","reducesinkkey2":"4333289744492811332","reducesinkkey3":"20170630","reducesinkkey4":"1"},"value":{"_col1":"customer"}}     @ org.apache.hadoop.hive.ql.exec.tez.reducerecordsource.pushrecord(reducerecordsource.java:297)     @ org.apache.hadoop.hive.ql.exec.tez.reducerecordprocessor.run(reducerecordprocessor.java:317)     @ org.apache.hadoop.hive.ql.exec.tez.tezprocessor.initializeandrunprocessor(tezprocessor.java:185)     ... 14 more     caused by: org.apache.hadoop.hive.ql.metadata.hiveexception: hive runtime error while processing row (tag=0) {"key":{"reducesinkkey0":"89634781","reducesinkkey1":"1498837079","reducesinkkey2":"4333289744492811332","reducesinkkey3":"20170630","reducesinkkey4":"1"},"value":{"_col1":"customer"}}         @ org.apache.hadoop.hive.ql.exec.tez.reducerecordsource$groupiterator.next(reducerecordsource.java:365)         @ org.apache.hadoop.hive.ql.exec.tez.reducerecordsource.pushrecord(reducerecordsource.java:287)         ... 16 more caused by: org.apache.hadoop.hive.ql.metadata.hiveexception: java.nio.channels.closedchannelexception     @ org.apache.hadoop.hive.ql.exec.filesinkoperator.process(filesinkoperator.java:796)     @ org.apache.hadoop.hive.ql.exec.operator.forward(operator.java:897)     @ org.apache.hadoop.hive.ql.exec.selectoperator.process(selectoperator.java:95)     @ org.apache.hadoop.hive.ql.exec.operator.forward(operator.java:897)     @ org.apache.hadoop.hive.ql.exec.filteroperator.process(filteroperator.java:126)     @ org.apache.hadoop.hive.ql.exec.operator.forward(operator.java:897)     @ org.apache.hadoop.hive.ql.exec.ptfoperator$ptfinvocation.handleoutputrows(ptfoperator.java:337)     @ org.apache.hadoop.hive.ql.exec.ptfoperator$ptfinvocation.processrow(ptfoperator.java:325)     @ org.apache.hadoop.hive.ql.exec.ptfoperator.process(ptfoperator.java:139)     @ org.apache.hadoop.hive.ql.exec.operator.forward(operator.java:897)     @ org.apache.hadoop.hive.ql.exec.selectoperator.process(selectoperator.java:95)     @ org.apache.hadoop.hive.ql.exec.tez.reducerecordsource$groupiterator.next(reducerecordsource.java:356)     ... 17 more caused by: java.nio.channels.closedchannelexception     @ org.apache.hadoop.hdfs.dfsoutputstream.checkclosed(dfsoutputstream.java:1546)     @ org.apache.hadoop.fs.fsoutputsummer.write(fsoutputsummer.java:104)     @ org.apache.hadoop.fs.fsdataoutputstream$positioncache.write(fsdataoutputstream.java:60)     @ java.io.dataoutputstream.write(dataoutputstream.java:107)     @ org.apache.hadoop.hive.ql.io.hiveignorekeytextoutputformat$1.write(hiveignorekeytextoutputformat.java:87)     @ org.apache.hadoop.hive.ql.exec.filesinkoperator.process(filesinkoperator.java:762) 

i tried setting config hive shell: hive> set hive.vectorized.execution.enabled=false; hive> set hive.vectorized.execution.reduce.enabled=false;

but still fails. sometime query runs while sometime fails due above reason.is there other workaround resolving this?


Comments

Popular posts from this blog

Sort a complex associative array in PHP -

vb.net - How to ignore if a cell is empty nothing -

recursion - Can every recursive algorithm be improved with dynamic programming? -