1.问题背景 开发同事反馈Vehicle服务下行信号可以发出,但是没有收到回调。给了一份关键日志如下:
1 2 3 4 5 6 7 8 9 LOGD 326 log error verbose 1 E/automotive.vehicle@2.0-impl( 270): Failed to notify client [class or subclass of android.hardware.automotive.vehicle@2.0::IVehicleCallback]@remote, err: Status(EX_TRANSACTION_FAILED): 'FAILED_TRANSACTION: ' LOGD 326 log error verbose 1 E/automotive.vehicle@2.0-impl( 270): Failed to notify client [class or subclass of android.hardware.automotive.vehicle@2.0::IVehicleCallback]@remote, err: Status(EX_TRANSACTION_FAILED): 'FAILED_TRANSACTION: ' LOGD 326 log error verbose 1 E/automotive.vehicle@2.0-impl( 270): Failed to notify client [class or subclass of android.hardware.automotive.vehicle@2.0::IVehicleCallback]@remote, err: Status(EX_TRANSACTION_FAILED): 'FAILED_TRANSACTION: ' LOGD 326 log error verbose 1 E/automotive.vehicle@2.0-impl( 270): Failed to notify client [class or subclass of android.hardware.automotive.vehicle@2.0::IVehicleCallback]@remote, err: Status(EX_TRANSACTION_FAILED): 'FAILED_TRANSACTION: ' LOGD 326 log error verbose 1 E/automotive.vehicle@2.0-impl( 270): Failed to notify client [class or subclass of android.hardware.automotive.vehicle@2.0::IVehicleCallback]@remote, err: Status(EX_TRANSACTION_FAILED): 'FAILED_TRANSACTION: ' LOGD 326 log error verbose 1 E/automotive.vehicle@2.0-impl( 270): Failed to notify client [class or subclass of android.hardware.automotive.vehicle@2.0::IVehicleCallback]@remote, err: Status(EX_TRANSACTION_FAILED): 'FAILED_TRANSACTION: ' LOGD 326 log error verbose 1 E/automotive.vehicle@2.0-impl( 270): Failed to notify client [class or subclass of android.hardware.automotive.vehicle@2.0::IVehicleCallback]@remote, err: Status(EX_TRANSACTION_FAILED): 'FAILED_TRANSACTION: '
2.问题调查过程 调查开始肯定是从报错的进程开始,遍历PID找到进程所有日志,看能不能找到部分线索。
1 2 3 4 5 6 KERN KERN 326 log info verbose 1 binder: 270 :282 transaction failed 29201 /-28 , size 1365540 -109248 line 3274 auto LOGD 326 log error verbose 1 E/automotive.vehicle@2.0-impl( 270): Failed to notify client [class or subclass of android.hardware.automotive.vehicle@2.0::IVehicleCallback]@remote, err: Status(EX_TRANSACTION_FAILED): 'FAILED_TRANSACTION: ' KERN KERN 326 log info verbose 1 binder: 270 :282 transaction failed 29201 /-28 , size 957540 -76608 line 3274 KERN KERN 326 log info verbose 1 binder: 270 :282 transaction failed 29201 /-28 , size 591540 -47328 line 3274 KERN KERN 326 log info verbose 1 binder: 270 :282 transaction failed 29201 /-28 , size 577940 -46240 line 3274 KERN KERN 326 log info verbose 1 binder: 270 :282 transaction failed 29201 /-28 , size 637140 -50976 line 3274
搜索”270“进程id可以看到vehicle进程报了binder传输异常。
然后看到KERN
, 是kernal报出来的错误。因此在kernal中去搜索错误报错地方,看是否能找到相关线索。
搜索”transaction failed“
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 err_invalid_target_handle: if (target_thread) binder_thread_dec_tmpref(target_thread); if (target_proc) binder_proc_dec_tmpref(target_proc); if (target_node) { binder_dec_node(target_node, 1 , 0 ); binder_dec_node_tmpref(target_node); } binder_debug(BINDER_DEBUG_FAILED_TRANSACTION, "%d:%d transaction failed %d/%d, size %lld-%lld line %d\n" , proc->pid, thread->pid, return_error, return_error_param, (u64)tr->data_size, (u64)tr->offsets_size, return_error_line);
搜索关键词,找到了关键log,binder驱动报错了,我们细数一下日志中每个报错参数的意思。
1 2 3 4 5 6 7 8 9 10 "%d:%d transaction failed %d/%d, size %lld-%lld line %d\n" proc->pid, thread->pid, return_error, return_error_param,(u64)tr->data_size, (u64)tr->offsets_size, return_error_line); 270 :282 transaction failed 29201 /-28 , size 637140 -50976 line 3274 报错进程id:270 线程id:282 错误码:29201 (BR_FAILED_REPLY) 标准错误码:-28 (No space left on device) binder_transaction_data的data_size大小为637140 字节 binder_transaction_data的offsets_size数据偏移为50976 报错代码行数:3274
因为GOS的linux系统版本是:Linux version 4.14.156;6125项目的kernal代码版本是Linux version 4.14.112的。因此这个报错行数参考意义不大。
从上面日志已经可以大概猜测到是发送的数据量过大。当前的binder_transaction_data的大小是637140字节。
3.源码分析 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 #define BINDER_VM_SIZE ((1 * 1024 * 1024) - sysconf(_SC_PAGE_SIZE) * 2) ProcessState::ProcessState(const char *driver) : mDriverName(String8(driver)) , mDriverFD(open_driver(driver)) , mVMStart(MAP_FAILED) , mThreadCountLock(PTHREAD_MUTEX_INITIALIZER) , mThreadCountDecrement(PTHREAD_COND_INITIALIZER) , mExecutingThreadsCount(0 ) , mMaxThreads(DEFAULT_MAX_BINDER_THREADS) , mStarvationStartTimeMs(0 ) , mManagesContexts(false ) , mBinderContextCheckFunc(NULL ) , mBinderContextUserData(NULL ) , mThreadPoolStarted(false ) , mThreadPoolSeq(1 ) { if (mDriverFD >= 0 ) { mVMStart = mmap(0 , BINDER_VM_SIZE, PROT_READ, MAP_PRIVATE | MAP_NORESERVE, mDriverFD, 0 ); if (mVMStart == MAP_FAILED) { ALOGE("Using %s failed: unable to mmap transaction memory.\n" , mDriverName.c_str()); close(mDriverFD); mDriverFD = -1 ; mDriverName.clear(); } } LOG_ALWAYS_FATAL_IF(mDriverFD < 0 , "Binder driver could not be opened. Terminating." ); }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 static const struct file_operations binder_fops = { .owner = THIS_MODULE, .poll = binder_poll, .unlocked_ioctl = binder_ioctl, .compat_ioctl = binder_ioctl, .mmap = binder_mmap, .open = binder_open, .flush = binder_flush, .release = binder_release, }; static int binder_mmap (struct file *filp, struct vm_area_struct *vma) { ...... if ((vma->vm_end - vma->vm_start) > SZ_4M) vma->vm_end = vma->vm_start + SZ_4M; ...... ret = binder_alloc_mmap_handler(&proc->alloc, vma); if (ret) return ret; mutex_lock(&proc->files_lock); proc->files = get_files_struct(current); mutex_unlock(&proc->files_lock); return 0 ; err_bad_arg: pr_err("%s: %d %lx-%lx %s failed %d\n" , __func__, proc->pid, vma->vm_start, vma->vm_end, failure_string, ret); return ret; }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 int binder_alloc_mmap_handler (struct binder_alloc *alloc, struct vm_area_struct *vma) { int ret; const char *failure_string; struct binder_buffer *buffer ; ...... alloc->buffer = (void __user *)vma->vm_start; mutex_unlock(&binder_alloc_mmap_lock); alloc->pages = kzalloc(sizeof (alloc->pages[0 ]) * ((vma>vm_end - vma->vm_start)- / PAGE_SIZE), GFP_KERNEL); if (alloc->pages == NULL ) { ret = -ENOMEM; failure_string = "alloc page array" ; goto err_alloc_pages_failed; } alloc->buffer_size = vma->vm_end - vma->vm_start; buffer = kzalloc(sizeof (*buffer), GFP_KERNEL); if (!buffer) { ret = -ENOMEM; failure_string = "alloc buffer struct" ; goto err_alloc_buf_struct_failed; } buffer->user_data = alloc->buffer; list_add(&buffer->entry, &alloc->buffers); buffer->free = 1 ; binder_insert_free_buffer(alloc, buffer); alloc->free_async_space = alloc->buffer_size / 2 ; binder_alloc_set_vma(alloc, vma); mmgrab(alloc->vma_vm_mm); return 0 ; ...... return ret; }
可以看到如果binder采用Binder oneway方式通信,数据的传输空间大小只有(1M - 8k) / 2
,即520190字节。
因此,需要模块具体调查哪个信号传输的数据超过了508kb。
版权声明: 此文章版权归Jack Ou所有,如有转载,请註明来自原作者