一、功能介绍
对图像中的阿拉伯数字进行识别提取,适用于快递单号、手机号、充值码提取等场景。
二、应用场景
快递面单识别
使用数字识别技术,对快递面单、物流单据、外卖小票中的电话号码进行识别和提取,大幅度提升收货人信息的录入效率,方便进行收件通知,同时可识别纯数字形式的快递三段码,有效提升快件分拣速度。
仪表读数识别
使用数字识别技术,对各类仪器仪表的读数进行识别和提取,可应用于对仪器仪表读数具有定时记录、数据统计、实时监控等需求的场景,有效降低人工录入成本,控制仪器使用风险。
三、使用攻略
说明:本文采用C# 语言,开发环境为.Net Core 2.2,采用在线API接口方式实现,需要用到 SixLabors.ImageSharp 和 SixLabors.ImageSharp.Drawing NuGet 程序包来对图片进行画框标识。
(1)平台接入
登陆 百度智能云-管理中心 创建 “文字识别”应用,获取 “API Key ”和 “Secret Key”:https://console.bce.baidu.com/ai/?_=1574823891186#/ai/ocr/overview/index
(2)接口文档
文档地址:https://ai.baidu.com/docs#/OCR-API-Numbers/top
接口描述:
对图像中的阿拉伯数字进行识别提取,适用于快递单号、手机号、充值码提取等场景。
请求说明
HTTP方法:POST
请求URL:https://aip.baidubce.com/rest/2.0/ocr/v1/numbers
URL参数:
Header如下:
Body中放置请求参数,参数详情如下:
请求参数
返回说明
返回参数
返回示例:
{
"log_id": 620759800,
"words_result": [
{
"location": {
"left": 56,
"top": 0,
"width": 21,
"height": 210
},
"words": "3"
}
],
"words_result_num": 1
}
(3)源码共享
(3-1)根据 API Key 和 Secret Key 获取 AccessToken
///
/// 获取百度access_token
///
/// API Key
/// Secret Key
///
public static string GetAccessToken(string clientId, string clientSecret)
{
string authHost = "https://aip.baidubce.com/oauth/2.0/token";
HttpClient client = new HttpClient();
List> paraList = new List>();
paraList.Add(new KeyValuePair("grant_type", "client_credentials"));
paraList.Add(new KeyValuePair("client_id", clientId));
paraList.Add(new KeyValuePair("client_secret", clientSecret));
HttpResponseMessage response = client.PostAsync(authHost, new FormUrlEncodedContent(paraList)).Result;
string result = response.Content.ReadAsStringAsync().Result;
JObject jo = (JObject)JsonConvert.DeserializeObject(result);
string token = jo["access_token"].ToString();
return token;
}
(3-2)调用API接口获取识别结果
(3-2-1)在Startup.cs 文件 的 Configure(IApplicationBuilder app, IHostingEnvironment env) 方法中开启虚拟目录映射功能:
string webRootPath = HostingEnvironment.WebRootPath;//wwwroot目录
app.UseStaticFiles(new StaticFileOptions
{
FileProvider = new PhysicalFileProvider(
Path.Combine(webRootPath, "Uploads", "BaiduAIs")),
RequestPath = "/BaiduAIs"
});
(3-2-2) 建立Index.cshtml文件
(3-2-2-1)前台代码:
由于html代码无法原生显示,只能简单说明一下:
主要是一个form表单,需要设置属性enctype="multipart/form-data",否则无法上传图片;
form表单里面有几个控件:
一个Input:type="file",asp-for="FileUpload" ,上传图片;
一个Input:type="submit",asp-page-handler="Numbers" ,提交请求。
一个img:src="@Model.curPath",显示识别处理后的图片。
最后显示后台 msg 字符串列表信息,如果需要输出原始Html代码,则需要使用@Html.Raw()函数。
(3-2-2-2) 后台代码:
主程序代码:
[BindProperty]
public IFormFile FileUpload { get; set; }
[BindProperty]
public string ImageUrl { get; set; }
private readonly IHostingEnvironment HostingEnvironment;
public List msg = new List();
public string curPath { get; set; }
string BaiduAI_OCRPath="Uploads//BaiduAIs//";
string BaiduAI_OCRUrl="/BaiduAIs/";
string OCR_API_KEY="你的API KEY";
string OCR_SECRET_KEY="你的SECRET KEY";
public OCRSearchModel(IHostingEnvironment hostingEnvironment)
{
HostingEnvironment = hostingEnvironment;
}
public async Task OnPostNumbersAsync()
{
if (FileUpload is null)
{
ModelState.AddModelError(string.Empty, "请先选择需要识别的图片!");
}
if (!ModelState.IsValid)
{
return Page();
}
msg = new List();
string webRootPath = HostingEnvironment.WebRootPath;//wwwroot目录
string fileDir = Path.Combine(webRootPath, BaiduAI_OCRPath);
string imgName = await UploadFile(FileUpload, fileDir);
string fileName = Path.Combine(fileDir, imgName);
string imgBase64 = GetFileBase64(fileName);
DateTime startTime = DateTime.Now;
string result = GetOCRJson(imgBase64, OCR_API_KEY, OCR_SECRET_KEY);
DateTime endTime = DateTime.Now;
TimeSpan ts = endTime - startTime;
JObject jo = (JObject)JsonStringToObj(result);
try
{
List msgList = jo["words_result"].ToList();
int number = msgList.Count;
int curNumber = 1;
msg.Add("数字识别结果(耗时" + ts.TotalSeconds + "秒):");
msg.Add("识别信息数(共" + number + "条):");
List recList = new List();
foreach (JToken ms in msgList)
{
if (number > 1)
{
msg.Add("第 " + curNumber.ToString() + " 条:");
}
string words = ms["words"].ToString();
float wleft = float.Parse(ms["location"]["left"].ToString());
float wtop = float.Parse(ms["location"]["top"].ToString());
float wwidth = float.Parse(ms["location"]["width"].ToString());
float wheight = float.Parse(ms["location"]["height"].ToString());
msg.Add("" + words + "(" + wleft + "," + wtop + "," + wwidth + "," + wheight + ")");
msg.Add("单字符位置:");
List charsList = ms["chars"].ToList();
foreach (JToken cr in charsList)
{
string car = cr["char"].ToString();
float left = float.Parse(cr["location"]["left"].ToString());
float top = float.Parse(cr["location"]["top"].ToString());
float width = float.Parse(cr["location"]["width"].ToString());
float height = float.Parse(cr["location"]["height"].ToString());
msg.Add("" + car + "(" + left + "," + top + "," + width + "," + height + ")");
Rectangle rec = new Rectangle(left, top, width, height);
recList.Add(rec);
}
curNumber++;
}
string imgSourcePath = Path.Combine(webRootPath, BaiduAI_OCRPath, imgName);
imgName = GetRandomName() + ".png";
string imgSavedPath = Path.Combine(webRootPath, BaiduAI_OCRPath, imgName);
await DrawPolygon(imgSourcePath, imgSavedPath, recList);
curPath = Path.Combine(BaiduAI_OCRUrl, imgName);
}
catch (Exception e1)
{
msg.Add(result + ":" + e1.Message);
}
return Page();
}
其他相关函数:
///
/// 文字识别Json字符串
///
/// 图片base64编码
/// API Key
/// Secret Key
///
public static string GetOCRJson( string strbaser64, string clientId, string clientSecret)
{
string token = GetAccessToken(clientId, clientSecret);
string host = "https://aip.baidubce.com/rest/2.0/ocr/v1/numbers?access_token=" + token;
Encoding encoding = Encoding.Default;
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(host);
request.Method = "post";
request.ContentType = "application/x-www-form-urlencoded";
request.KeepAlive = true;
string str = "image=" + HttpUtility.UrlEncode(strbaser64);
str += "&recognize_granularity=small";
byte[] buffer = encoding.GetBytes(str);
request.ContentLength = buffer.Length;
request.GetRequestStream().Write(buffer, 0, buffer.Length);
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
StreamReader reader = new StreamReader(response.GetResponseStream(), Encoding.Default);
string result = reader.ReadToEnd();
return result;
}
///
/// 获取百度access_token
///
/// API Key
/// Secret Key
///
public static string GetAccessToken(string clientId, string clientSecret)
{
string authHost = "https://aip.baidubce.com/oauth/2.0/token";
HttpClient client = new HttpClient();
List> paraList = new List>();
paraList.Add(new KeyValuePair("grant_type", "client_credentials"));
paraList.Add(new KeyValuePair("client_id", clientId));
paraList.Add(new KeyValuePair("client_secret", clientSecret));
HttpResponseMessage response = client.PostAsync(authHost, new FormUrlEncodedContent(paraList)).Result;
string result = response.Content.ReadAsStringAsync().Result;
JObject jo = (JObject)JsonConvert.DeserializeObject(result);
string token = jo["access_token"].ToString();
return token;
}
///
/// 生成一个随机唯一文件名(Guid)
///
///
public static string GetRandomName()
{
return Guid.NewGuid().ToString("N");
}
///
/// 返回图片的base64编码
///
/// 文件绝对路径名称
///
public static String GetFileBase64(string fileName)
{
FileStream filestream = new FileStream(fileName, FileMode.Open);
byte[] arr = new byte[filestream.Length];
filestream.Read(arr, 0, (int)filestream.Length);
string baser64 = Convert.ToBase64String(arr);
filestream.Close();
return baser64;
}
///
/// json转为对象
///
/// Json字符串
///
public static Object JsonStringToObj(string jsonString)
{
Object s = JsonConvert.DeserializeObject(jsonString);
return s;
}
///
/// 上传文件,返回文件名
///
/// 文件上传控件
/// 文件绝对路径
///
public static async Task UploadFile(IFormFile formFile, string fileDir)
{
if (!DirectoryExists(directory))
{
Directory.CreateDirectory(directory);
}
string extension = Path.GetExtension(formFile.FileName);
string imgName = Guid.NewGuid().ToString("N") + extension;
var filePath = Path.Combine(fileDir, imgName);
using (var fileStream = new FileStream(filePath, FileMode.Create, FileAccess.Write))
{
await formFile.CopyToAsync(fileStream);
}
return imgName;
}
///
/// 画矩形
///
/// 原图
/// 目标图
/// 矩形数据
public static async Task DrawPolygon(string originalPath, string targetPath, List recList)
{
using (Image image = Image.Load(originalPath))
{
foreach (Rectangle rec in recList)
{
image.Mutate(
x => x.DrawPolygon(
rec.LineColor,
rec.Thinkness,
rec.point1, rec.point2, rec.point3, rec.point4));
}
image.Save(targetPath);
}
}
矩形类:
///
/// 矩形
///
public class Rectangle
{
///
/// X坐标
///
[Display(Name = "X坐标")]
public float X { get; set; }
///
/// Y坐标
///
[Display(Name = "Y坐标")]
public float Y { get; set; }
///
/// 宽度
///
[Display(Name = "宽度")]
public float Width { get; set; }
///
/// 高度
///
[Display(Name = "高度")]
public float Height { get; set; }
///
/// 线条颜色
///
[Display(Name = "线条颜色")]
public Color LineColor { get; set; }
///
/// 线条厚度
///
[Display(Name = "线条厚度")]
public float Thinkness { get; set; }
///
/// 上左点坐标
///
[Display(Name = "上左点坐标")]
public Vector2 point1
{
get
{
return new Vector2(X, Y);
}
}
///
/// 上右点坐标
///
[Display(Name = "上右点坐标")]
public Vector2 point2
{
get
{
return new Vector2(X + Width, Y);
}
}
///
/// 下右点坐标
///
[Display(Name = "下右点坐标")]
public Vector2 point3
{
get
{
return new Vector2(X + Width, Y + Height);
}
}
///
/// 下左点坐标
///
[Display(Name = "下左点坐标")]
public Vector2 point4
{
get
{
return new Vector2(X, Y + Height);
}
}
public Rectangle()
{
}
///
/// 数据初始化
///
/// X坐标
/// Y坐标
/// 宽度
/// 高度
public Rectangle(float x, float y, float width, float height)
{
X = x;
Y = y;
Width = width;
Height = height;
LineColor = Color.Red;
Thinkness = 1;
}
}
四、效果测试
1、页面:
2、识别结果:
2.1
2.2
2.3
2.4
2.5
五、测试结果及建议
从上述的测试结果可以发现,百度的《数字识别》AI技术整体功能还是不错的,基本上可以准确识别出图片中的数字内容。
不过识别速度跟图片的文字内容多少有关,如果图片文字比较多(图2.4),那识别速度会大大降低,虽然其中数字内容并不是很多,这方面还需要优化一下,如果能够根据数字和非数字的特性,直接过滤非数字内容,然后单独识别数字内容,这样应该能提高数字识别速度。
另外,还存在部分数字无法识别(漏识别)的情况(图2.1中的“25日多云间晴天”中的“25”没有被识别出来,图2.3的头部条形码下面的数字没有被识别出来),不知道是什么情况,可能存在识别盲区?这里还需要再查找优化一下。
此外,目前识别的结果,是根据“数字是否在同一行”的情况下,去区分当前图片中有多少条数字内容,这个区分统计不太好,如果可能,将其改进成根据“数字是否紧靠在一起”的规则将其区分统计成有多少条数字内容,这样的区分结果会更加符合人类的统计观念。
如果能将数字区分统计结果按照“数字是否紧靠在一起”的规则进行统计,那么数字识别这一AI技术完全可以将其运用到提取相关报告的关键数字上去,这样就大大增加了数字识别技术的运用领域了,目前的大部分报告内容,文字内容很多,但多是解释性语言,想要快速获取关键数字信息有点困难,如果能够将报告化成图片,然后快速识别其中的数字内容,这样就能快速掌握报告的关键点,大大提高报告阅读理解速度了。当然,如果能够直接根据文字内容,整理出报告的关键点,那么就更完美了。