-
Notifications
You must be signed in to change notification settings - Fork 9
Expand file tree
/
Copy pathconfigure
More file actions
executable file
·224 lines (189 loc) · 7.68 KB
/
configure
File metadata and controls
executable file
·224 lines (189 loc) · 7.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
#!/bin/sh
# configure script for RcppMeCab
# Detects MeCab installation or builds from source
# Find mecab-config
MECAB_CONFIG=""
if command -v mecab-config >/dev/null 2>&1; then
MECAB_CONFIG="mecab-config"
fi
# Check Homebrew paths on macOS
if [ -z "$MECAB_CONFIG" ]; then
for prefix in /opt/homebrew /usr/local; do
if [ -x "$prefix/bin/mecab-config" ]; then
MECAB_CONFIG="$prefix/bin/mecab-config"
break
fi
done
fi
if [ -n "$MECAB_CONFIG" ]; then
echo "Found mecab-config at: $MECAB_CONFIG"
MECAB_CFLAGS=$($MECAB_CONFIG --cflags)
MECAB_LIBS=$($MECAB_CONFIG --libs)
else
echo "mecab-config not found. Building MeCab from source..."
PKG_DIR="$(pwd)"
MECAB_BUILD_DIR="${PKG_DIR}/src/mecab_build"
MECAB_INSTALL_DIR="${PKG_DIR}/src/mecab_local"
MECAB_LANG="${MECAB_LANG:-ko}"
if [ "$MECAB_LANG" = "ja" ]; then
MECAB_SRC_URL="https://github.com/taku910/mecab/archive/61b90ba6e669dc2d7d533d4a80d206f3b31d52b1.tar.gz"
echo "Building MeCab (Japanese, taku910/mecab 0.996) from source..."
else
MECAB_SRC_URL="https://github.com/Pusnow/mecab-ko-msvc/archive/refs/tags/release-0.999.tar.gz"
echo "Building MeCab-Ko (Korean, mecab-ko 0.999) from source..."
fi
MECAB_TARBALL="$MECAB_BUILD_DIR/mecab-source.tar.gz"
mkdir -p "$MECAB_BUILD_DIR"
echo "Downloading MeCab source..."
if command -v curl >/dev/null 2>&1; then
curl -fsSL "$MECAB_SRC_URL" -o "$MECAB_TARBALL"
elif command -v wget >/dev/null 2>&1; then
wget -q "$MECAB_SRC_URL" -O "$MECAB_TARBALL"
else
echo "ERROR: Neither curl nor wget found. Cannot download MeCab source."
exit 1
fi
if [ ! -f "$MECAB_TARBALL" ]; then
echo "ERROR: Failed to download MeCab source."
exit 1
fi
echo "Extracting MeCab source..."
tar xzf "$MECAB_TARBALL" -C "$MECAB_BUILD_DIR" --strip-components=1 || {
echo "ERROR: Failed to extract MeCab source."
exit 1
}
# taku910/mecab extracts to mecab_build/mecab/; mecab-ko-msvc extracts flat
if [ -d "$MECAB_BUILD_DIR/mecab" ] && [ -f "$MECAB_BUILD_DIR/mecab/configure" ]; then
MECAB_SRC_DIR="$MECAB_BUILD_DIR/mecab"
else
MECAB_SRC_DIR="$MECAB_BUILD_DIR"
fi
if [ ! -f "$MECAB_SRC_DIR/configure" ]; then
echo "ERROR: MeCab configure script not found at $MECAB_SRC_DIR/configure"
exit 1
fi
# mecab-ko-msvc configure expects src/Makefile.msvc.in; create stub if missing
if [ ! -f "$MECAB_SRC_DIR/src/Makefile.msvc.in" ]; then
touch "$MECAB_SRC_DIR/src/Makefile.msvc.in"
fi
# Patch MeCab source for R compatibility:
# Avoid exit(), stdout, stderr, printf, sprintf which R CMD check forbids.
echo "Patching MeCab source for R compatibility..."
SRCDIR="$MECAB_SRC_DIR/src"
# Create a compatibility header that redirects problematic calls
cat > "$SRCDIR/mecab_r_compat.h" << 'COMPAT_EOF'
#ifndef MECAB_R_COMPAT_H_
#define MECAB_R_COMPAT_H_
#include <iostream>
#include <cstdio>
// Null output stream that inherits from std::ostream for full compatibility
class MeCabNullBuf : public std::streambuf {
protected:
int overflow(int c) { return c; }
};
static MeCabNullBuf mecab_null_buf_;
static std::ostream mecab_null_os_(&mecab_null_buf_);
#endif
COMPAT_EOF
# Add compat header include, replace std::cerr/std::cout, and apply per-file fixes
for f in "$SRCDIR"/*.cpp "$SRCDIR"/*.h; do
case "$f" in */mecab_r_compat.h) continue;; esac
sed -i.bak '1i\
#include "mecab_r_compat.h"
s/std::cerr/mecab_null_os_/g; s/std::cout/mecab_null_os_/g' "$f"
done
# Per-file patches (run once each, after the loop)
# common.h: remove exit() from die class destructor
# utils.h: sprintf -> snprintf
# utils.cpp: gut progress_bar body to avoid printf/stdout
# eval.cpp: sprintf -> snprintf
sed -i.bak '/^class die/,/^};/ { /exit(-1);/d; }' "$SRCDIR/common.h"
sed -i.bak 's/std::sprintf(s, "%-16f", val)/std::snprintf(s, sizeof(s), "%-16f", val)/' "$SRCDIR/utils.h"
sed -i.bak '/^int progress_bar/,/^}/ { /^int progress_bar/!{ /^}/!d; }; /^int progress_bar/a\
return 1;
}' "$SRCDIR/utils.cpp"
sed -i.bak 's/sprintf(buf\.get(),/snprintf(buf.get(), 256,/' "$SRCDIR/eval.cpp"
rm -f "$SRCDIR"/*.bak
echo "Configuring MeCab..."
cd "$MECAB_SRC_DIR"
# MeCab source uses 'register' keyword which is an error in C++17;
# force C++14 and suppress deprecation warnings for the MeCab build.
# Unset R's CXX/CXXFLAGS to prevent R's C++17 standard from leaking in.
CXX="c++" CXXFLAGS="-std=c++14 -Wno-deprecated-declarations -O2 -fPIC" \
./configure --with-charset=utf8 --enable-utf8-only --prefix="$MECAB_INSTALL_DIR" --enable-static --disable-shared --with-pic > mecab_configure.log 2>&1
if [ $? -ne 0 ]; then
echo "ERROR: MeCab configure failed. See $MECAB_SRC_DIR/mecab_configure.log"
exit 1
fi
echo "Building MeCab..."
make > mecab_make.log 2>&1
if [ $? -ne 0 ]; then
echo "ERROR: MeCab build failed. See $MECAB_SRC_DIR/mecab_make.log"
exit 1
fi
echo "Installing MeCab locally..."
make install > mecab_install.log 2>&1
if [ $? -ne 0 ]; then
echo "ERROR: MeCab install failed. See $MECAB_SRC_DIR/mecab_install.log"
exit 1
fi
cd "$PKG_DIR"
# Download and install dictionary into inst/dic/
MECAB_DIC_DIR="${PKG_DIR}/inst/dic"
mkdir -p "$MECAB_DIC_DIR"
if [ "$MECAB_LANG" = "ja" ]; then
echo "Compiling IPAdic (Japanese) dictionary..."
# ipadic source is in the same taku910/mecab archive we already downloaded
# Re-download since we may have cleaned up; extract only mecab-ipadic/
DIC_BUILD_DIR="${PKG_DIR}/src/dic_build"
mkdir -p "$DIC_BUILD_DIR"
DIC_TARBALL="$DIC_BUILD_DIR/mecab-source.tar.gz"
curl -fsSL "$MECAB_SRC_URL" -o "$DIC_TARBALL" || wget -q "$MECAB_SRC_URL" -O "$DIC_TARBALL"
tar xzf "$DIC_TARBALL" -C "$DIC_BUILD_DIR" --strip-components=1
IPADIC_DIR="$DIC_BUILD_DIR/mecab-ipadic"
DICT_INDEX="$MECAB_INSTALL_DIR/libexec/mecab/mecab-dict-index"
if [ ! -x "$DICT_INDEX" ]; then
DICT_INDEX="$MECAB_INSTALL_DIR/bin/mecab-dict-index"
fi
"$DICT_INDEX" \
-d "$IPADIC_DIR" \
-o "$MECAB_DIC_DIR" \
-f euc-jp -t utf-8 > /dev/null 2>&1
if [ $? -ne 0 ]; then
echo "WARNING: Failed to compile IPAdic dictionary. Package will work but needs manual dictionary setup."
else
# Copy dicrc and .def files needed at runtime
cp "$IPADIC_DIR/dicrc" "$MECAB_DIC_DIR/" 2>/dev/null
echo "IPAdic dictionary compiled to: $MECAB_DIC_DIR"
fi
rm -rf "$DIC_BUILD_DIR"
else
echo "Downloading pre-compiled mecab-ko-dic..."
DIC_URL="https://github.com/Pusnow/mecab-ko-msvc/releases/download/release-0.999/mecab-ko-dic.tar.gz"
DIC_TARBALL="/tmp/mecab-ko-dic-$$.tar.gz"
curl -fsSL "$DIC_URL" -o "$DIC_TARBALL" || wget -q "$DIC_URL" -O "$DIC_TARBALL"
if [ -f "$DIC_TARBALL" ]; then
tar xzf "$DIC_TARBALL" -C "$MECAB_DIC_DIR" --strip-components=1
rm -f "$DIC_TARBALL"
echo "mecab-ko-dic installed to: $MECAB_DIC_DIR"
else
echo "WARNING: Failed to download mecab-ko-dic. Package will work but needs manual dictionary setup."
fi
fi
# Clean up build artifacts (only keep installed lib and headers)
rm -rf "$MECAB_BUILD_DIR"
MECAB_CFLAGS="-I${MECAB_INSTALL_DIR}/include"
MECAB_LIBS="-L${MECAB_INSTALL_DIR}/lib -lmecab"
echo "MeCab built successfully at: $MECAB_INSTALL_DIR"
fi
echo " MECAB_CFLAGS: $MECAB_CFLAGS"
echo " MECAB_LIBS: $MECAB_LIBS"
# Get RcppParallel linker flags
RCPPPARALLEL_LIBS=$("${R_HOME}/bin/Rscript" -e "cat(RcppParallel::RcppParallelLibs())")
echo " RCPPPARALLEL_LIBS: $RCPPPARALLEL_LIBS"
# Generate src/Makevars from src/Makevars.in
sed -e "s|@MECAB_CFLAGS@|${MECAB_CFLAGS}|" \
-e "s|@MECAB_LIBS@|${MECAB_LIBS}|" \
-e "s|@RCPPPARALLEL_LIBS@|${RCPPPARALLEL_LIBS}|" \
src/Makevars.in > src/Makevars
echo "Configuration complete."