alias cdbrat="cd /opt/brat-v1.3_Crunchy_Frog"
alias runbrat="python2 standalone.py"
cdbrat && runbrat
find medical -name '*.txt' |sed -e 's|\.txt|.ann|g' |xargs touch
n = re.sub(u'[^a-zA-Z\u4e00-\u9fa5<>,0-9_-]', '_', n)
def mark_ann(enti_list,txt_list):
for txt in txt_list:
txt_name = txt.get_name # 获取txt的名字
result = []
count = 0
for enti in enti_list: # (enti_name,enti_type)
split_list = txt.split(enti)
pre = -1
enti_name = enti[0]
enti_type = enti[1]
len_word = len(enti_name)
for idx,words in enumerate(split_list):
if idx < len(split_list) - 1:
start = pre + len(words) + 1
end = start + len_word - 1
pre = end
count += 1
result.append(("T" + str(count),enti_type,str(start),str(end),enti_name))
with open("txt_name" + ".ann","w") as f:
for idx,t in enumerate(result):
f.write(" ".join(t))
if idx < len(result) -1:
f.write("\n")
class Trie {
constructor() {
this.root = new Node("root");
}
insert(word) {
var cur = this.root;
for (var i = 0; i < word.length; i++) {
var c = word[i];
var node = cur.children[c];
if (!node) {
node = cur.children[c] = new Node(word[i]);
}
cur = node;
}
cur.pattern = word; //防止最后收集整个字符串用
cur.endCount++; //这个字符串重复添加的次数
}
}
function createGoto(trie, patterns) {
for (var i = 0; i < patterns.length; i++) {
trie.insert(patterns[i]);
}
}
function createFail(ac) {
var root = ac.root;
var queue = [root]; //root所在层为第0层
while (queue.length) {
//广度优先遍历
var node = queue.shift();
if (node) {
//将其孩子逐个加入列队
for (var i in node.children) {
var child = node.children[i];
if (node === root) {
child.fail = root; //第1层的节点的fail总是指向root
} else {
var p = node.fail; //第2层以下的节点, 其fail是在另一个分支上
while (p) {
//遍历它的孩子,看它们有没与当前孩子相同字符的节点
if (p.children[i]) {
child.fail = p.children[i];
break;
}
p = p.fail;
}
if (!p) {
child.fail = root;
}
}
queue.push(child);
}
}
}
}