1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
|
# Copyright 1999-2017 Gentoo Foundation
# Distributed under the terms of the GNU General Public License v2
EAPI=6
MY_PN="tesseract-ocr"
LANGPACKV="4.00"
URI_PREFIX="https://github.com/${MY_PN}/tessdata/raw/${LANGPACKV}/"
JAVA_PKG_OPT_USE="scrollview"
inherit autotools java-pkg-opt-2 toolchain-funcs
DESCRIPTION="An OCR Engine, orginally developed at HP, now open source."
HOMEPAGE="https://github.com/tesseract-ocr"
SRC_URI="https://github.com/${MY_PN}/${PN}/archive/${PV/_}.tar.gz -> ${P}.tar.gz
${URI_PREFIX}eng.traineddata -> eng.traineddata-${LANGPACKV}
math? ( ${URI_PREFIX}equ.traineddata -> equ.traineddata-${LANGPACKV} )
osd? ( ${URI_PREFIX}osd.traineddata -> osd.traineddata-${LANGPACKV} )"
LICENSE="Apache-2.0"
SLOT="0"
KEYWORDS="~alpha ~amd64 ~arm ~mips ~ppc ~ppc64 ~sparc ~x86"
IUSE="doc examples jpeg math opencl openmp osd png scrollview static-libs tiff training webp"
# List of supported Gentoo linguas and their upstream mapping
# https://github.com/tesseract-ocr/tesseract/wiki/Data-Files
# "old" variants were regrouped in the matching modern locale
LANGUAGES="af:afr am:amh ar:ara as:asm az:aze,aze_cyrl be:bel bn:ben bo:bod bs:bos bg:bul ca:cat cs:ces zh:chi_sim,chi_tra cy:cym da:dan de:deu,frk dz:dzo el:ell,grc en:enm eo:epo et:est eu:eus fa:fas fi:fin fr:fra,frm ga:gle gl:glg gu:guj he:heb hi:hin hr:hrv hu:hun id:ind is:isl it:ita,ita_old ja:jpn kn:kan ka:kat,kat_old kk:kaz km:khm ky:kir ko:kor ku:kur lo:lao la:lat lv:lav lt:lit ml:mal mr:mar mk:mkd ms:msa my:mya ne:nep nl:nld no:nor or:ori pa:pan pl:pol pt:por ro:ron ru:rus sa:san si:sin sk:slk sl:slv es:spa,spa_old sq:sqi sr:srp,srp_latn sw:swa sv:swe syc:syr ta:tam te:tel tg:tgk tl:tgl th:tha tr:tur ug:uig uk:ukr uz:uzb,uzb_cyrl vi:vie"
# Missing matches:
# ceb Cebuano
# chr Cherokee
# hat Haitian; Haitian Creole
# iku Inuktitut
# jav Javanese
# mlt Maltese
# pus Pushto; Pashto
# tir Tigrinya
# urd Urdu
# yid Yiddish
# l10n_en provides the additional data:
# enm English, Middle (1100-1500)
for lang in ${LANGUAGES}; do
gentoo_lang=${lang%:*}
tess_langs=${lang#*:}
for tess_lang in ${tess_langs//,/ }; do
SRC_URI+=" l10n_${gentoo_lang}? ( ${URI_PREFIX}${tess_lang}.traineddata -> ${tess_lang}.traineddata-${LANGPACKV} )"
done
IUSE+=" l10n_${gentoo_lang}"
done
# With opencl USE=tiff is necessary in leptonica
CDEPEND=">=media-libs/leptonica-1.74:=[zlib,tiff?,jpeg?,png?,webp?]
opencl? (
virtual/opencl
media-libs/tiff:0=
media-libs/leptonica:=[tiff]
)
scrollview? (
>=dev-java/piccolo2d-3.0:0
)
training? (
dev-libs/icu:=
x11-libs/pango:=
x11-libs/cairo:=
)"
DEPEND="${CDEPEND}
doc? ( app-doc/doxygen )
scrollview? ( >=virtual/jdk-1.7 )"
RDEPEND="${CDEPEND}
scrollview? ( >=virtual/jre-1.7 )"
DOCS=( AUTHORS ChangeLog NEWS README.md )
PATCHES=(
"${FILESDIR}/${PN}-3.04.01-use-system-piccolo2d.patch"
"${FILESDIR}/${P}-isnan.patch"
"${FILESDIR}/${P}-openmp.patch"
"${FILESDIR}/${P}-no_graphics.patch"
)
S=${WORKDIR}/${P/_}
pkg_pretend() {
[[ ${MERGE_TYPE} != binary ]] && use openmp && tc-check-openmp
}
pkg_setup() {
[[ ${MERGE_TYPE} != binary ]] && use openmp && tc-check-openmp
}
src_unpack() {
unpack ${P}.tar.gz
for file in ${A}; do
if [[ "${file}" == *traineddata* ]]; then
cp "${DISTDIR}/${file}" "${S}/tessdata/${file%-*}" || die
fi
done
}
src_prepare() {
default
eautoreconf
java-pkg-opt-2_src_prepare
}
src_configure() {
local myeconfargs=(
--enable-shared
$(use_enable opencl)
$(use_enable openmp)
$(use_enable scrollview graphics)
$(use_enable static-libs static)
)
econf "${myeconfargs[@]}"
}
src_compile() {
default
use doc && emake doc
use scrollview && emake ScrollView.jar JAVAC="javac $(java-pkg_javac-args)"
use training && emake training
}
src_install() {
use doc && HTML_DOCS=( doc/html/. )
default
prune_libtool_files
if use training; then
emake DESTDIR="${D}" training-install
fi
if use examples; then
insinto /usr/share/doc/${PF}/examples
doins testing/eurotext.tif testing/phototest.tif
fi
insinto /usr/share/tessdata
doins tessdata/*traineddata* # language files
use scrollview && doins java/ScrollView.jar # scrollview
}
|