关于python调用百度api接口识别身份证信息

毕设工厂 2024年04月16日 10:03 论文问答 1

问题遇到的现象和发生背景

在调用百度云api接口（身份证识别）时，已经可以正常读取到身份证信息，但是始终无法找到正确的办法从字符串中提取姓名、性别、地址、身份证号码等字段内容。

源代码如下所示（APP ID、API KEY、SECRET_KEY这三个内容，需要自行去百度智能云申请，我提供的这个是处理过的，不能用）

问题相关代码

import base64
import os
import pandas as pd
from aip import AipOcr
import requests
import openpyxl

def findAllFile(base):
    for root, ds, fs in os.walk(base):
        for f in fs:
            yield base + f

if __name__ == '__main__':
    APP_ID ='25900060'
    API_KEY ='c6hZAkhjgkim2o8peTK1'
    SECRET_KEY ='B4mzBDkhkvhhjhjkY92H5AZh35BSnQ8'
    # 创建客户端对象
    client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
    names = []
    genders = []
    nations = []
    births = []
    address = []
    ids = []

#可并发处理10张
    base = 'idcards/'      #和该py文件同文件夹下的一个文件夹，里面存放多张身份证图片
    for i in findAllFile(base):
        # 打开并读取文件内容
        fp = open(i, "rb").read()
        res = client.idcard(fp,'front')
        print(res)

运行结果及报错内容

{'words_result': {'姓名': {'location': {'top': 42, 'left': 61, 'width': 34, 'height': 13}, 'words': '甘绍'}, '民族': {'location': {'top': 65, 'left': 111, 'width': 9, 'height': 11}, 'words': '壮'}, '住址': {'location': {'top': 105, 'left': 58, 'width': 107, 'height': 27}, 'words': '南宁市青秀区云景路382号'}, '公民身份号码': {'location': {'top': 156, 'left': 97, 'width': 134, 'height': 15}, 'words': '452128090008053052'}, '出生': {'location': {'top': 84, 'left': 57, 'width': 85, 'height': 13}, 'words': '1989875'}, '性别': {'location': {'top': 64, 'left': 61, 'width': 9, 'height': 11}, 'words': '男'}}, 'idcard_number_type': 1, 'words_result_num': 6, 'image_status': 'normal', 'log_id': 1484889181992238307}

我的解答思路和尝试过的方法

原先想通过下面方法提取想要的内容，并输出到表格中，但是发现该方法不可行，因为读取出来的字符串内容无法匹配到相关内容。

# 遍历结果
        addr = ""
        for tex in res["words_result"]:
            row = tex["words"]
            if "姓名" in row:
                names.append(row[2:])
            elif "性别" in row:
                genders.append(row[2:3])
                nations.append(row[5:])
            elif "出生" in row:
                births.append(row[2:])
            elif "住址" in row:
                addr += row[2:]
            elif "公民身份证号码" in row:
                ids.append(row[7:])
            else:
                addr += row
        address.append(addr)
    df = pd.DataFrame({"姓名": names, "性别": genders, "民族": nations,
                       "出生": births, "住址": address, "身份证号码": ids})
    df.to_excel('idcards.xlsx', index=False)

从下面是阿里云智能云的身份证识别中的关于返回内容的示例。网址： https://cloud.baidu.com/doc/OCR/s/rk3h7xzck

{
    "log_id": 2648325511,
    "direction": 0,
    "image_status": "normal",
    "photo": "/9j/4AAQSkZJRgABA......",
    "photo_location": {
        "width": 1189,
        "top": 638,
        "left": 2248,
        "height": 1483
    },
    "card_image": "/9j/4AAQSkZJRgABA......",
    "card_location": {
        "top": 328,
        "left": 275,
        "width": 1329,
        "height": 571
    },
    "words_result": {
        "住址": {
            "location": {
                "left": 267,
                "top": 453,
                "width": 459,
                "height": 99
            },
            "words": "南京市江宁区弘景大道3889号"
        },
        "公民身份号码": {
            "location": {
                "left": 443,
                "top": 681,
                "width": 589,
                "height": 45
            },
            "words": "330881199904173914"
        },
        "出生": {
            "location": {
                "left": 270,
                "top": 355,
                "width": 357,
                "height": 45
            },
            "words": "19990417"
        },
        "姓名": {
            "location": {
                "left": 267,
                "top": 176,
                "width": 152,
                "height": 50
            },
            "words": "伍云龙"
        },
        "性别": {
            "location": {
                "left": 269,
                "top": 262,
                "width": 33,
                "height": 52
            },
            "words": "男"
        },
        "民族": {
            "location": {
                "left": 492,
                "top": 279,
                "width": 30,
                "height": 37
            },
            "words": "汉"
        }
    },
    "words_result_num": 6
}

我想要达到的结果

我想要从中读取出姓名、性别、民族等字段的内容，最后通过某个函数输出到excel表格中。

共2条回复我来回复

代码向导

这个人很懒，什么都没有留下～

你题目的解答代码如下：

import pandas as pd

res={
    "log_id": 2648325511,
    "direction": 0,
    "image_status": "normal",
    "photo": "/9j/4AAQSkZJRgABA......",
    "photo_location": {
        "width": 1189,
        "top": 638,
        "left": 2248,
        "height": 1483
    },
    "card_image": "/9j/4AAQSkZJRgABA......",
    "card_location": {
        "top": 328,
        "left": 275,
        "width": 1329,
        "height": 571
    },
    "words_result": {
        "住址": {
            "location": {
                "left": 267,
                "top": 453,
                "width": 459,
                "height": 99
            },
            "words": "南京市江宁区弘景大道3889号"
        },
        "公民身份号码": {
            "location": {
                "left": 443,
                "top": 681,
                "width": 589,
                "height": 45
            },
            "words": "330881199904173914"
        },
        "出生": {
            "location": {
                "left": 270,
                "top": 355,
                "width": 357,
                "height": 45
            },
            "words": "19990417"
        },
        "姓名": {
            "location": {
                "left": 267,
                "top": 176,
                "width": 152,
                "height": 50
            },
            "words": "伍云龙"
        },
        "性别": {
            "location": {
                "left": 269,
                "top": 262,
                "width": 33,
                "height": 52
            },
            "words": "男"
        },
        "民族": {
            "location": {
                "left": 492,
                "top": 279,
                "width": 30,
                "height": 37
            },
            "words": "汉"
        }
    },
    "words_result_num": 6
}


names = []
genders = []
nations = []
births = []
address = []
ids = []

tex = res["words_result"]
if "姓名" in tex:
    names.append(tex["姓名"]["words"])
if "性别" in tex:
    genders.append(tex["性别"]["words"])
if "民族" in tex:
    nations.append(tex["民族"]["words"])
if "出生" in tex:
    births.append(tex["出生"]["words"])
if "住址" in tex:
    address.append(tex["住址"]["words"])
if "公民身份号码" in tex:
    ids.append(tex["公民身份号码"]["words"])
df = pd.DataFrame({"姓名": names, "性别": genders, "民族": nations, "出生": births, "住址": address, "身份证号码": ids})
print(df)
df.to_excel('idcards.xlsx', index=False)

0条评论

毕设小屋
这个人很懒，什么都没有留下～
评论
我刚刚用 print(type(res)) 查看了res的数据类型为：dict 输出结果如下：

<class 'dict'>

但是这个字典感觉比普通的字典要复杂,下面几种办法都取不出我想要的内容（取出姓名对应的内容：甘绍）

print(res.setdefault('姓名',None))
0条评论

发表回复

登录后才能评论