Compare commits

..

2 Commits

Author SHA1 Message Date
Zovjsra c2b9dc1b42 change port
4 years ago
Zovjsra c461ca1a9d change port
4 years ago

@ -1,10 +0,0 @@
FROM python:3.8.16-bullseye
ADD . /app
RUN pip install flask flask-compress jieba numpy progressbar2 nltk langdetect
WORKDIR /app
RUN mkdir data
ENTRYPOINT [ "/usr/local/bin/python", "app.py" ]

@ -1,3 +1 @@
# ptt-sententree
## Reference
https://github.com/twitter/SentenTree

@ -6,8 +6,8 @@ from pprint import pprint
import threading
import random
import string
# import dataHandlerPTT as ptt
# import dataHandlerPTTPush as pttPush
import dataHandlerPTT as ptt
import dataHandlerPTTPush as pttPush
import generalText as gen
import json
@ -33,9 +33,9 @@ def eventStream(eventQueue):
yield "event:{event}\n{data}\n\n".format(event=eventNode['event'], data=data)
@app.route('/img/<path:path>')
@app.route('/data/<path:path>')
def send_data(path):
return send_from_directory('resource/img', path)
return send_from_directory('data', path)
@app.route('/generalTxt')
@ -57,7 +57,7 @@ def generalText_addText():
@app.route('/')
def index():
return redirect('/generalTxt')
return redirect('/ptt')
@app.route('/ptt_push')
@ -151,13 +151,10 @@ def send_resource(path):
return send_from_directory('resource', path)
@app.route('/generaltxt/help')
def generaltxt_help():
return render_template('generaltxt_help.html', title="使用說明")
@app.route("/dcard_dev")
def dcard_dev():
return render_template('dcard.html', title='DCard Sentntree 測試版')
@app.route('/data/<path:path>')
def get_data(path):
return send_from_directory('data', path)
if __name__ == "__main__":
app.run(debug=True, port=4980, host='0.0.0.0', threaded=False)
app.run(debug=True, port=4998, host='0.0.0.0', threaded=True)

@ -15,7 +15,7 @@ from numpy import prod
from jieba import posseg
from progressbar import ProgressBar
from datetime import datetime
#from PTTData import PTTData
from PTTData import PTTData
defaultDate = {
@ -29,7 +29,7 @@ with open('/home/vis/pttDatabase/PTTData/Gossiping/content/content.pck', 'rb') a
f.close()
defaultStopWords = []
#data = PTTData('Gossiping', '/home/vis/pttDatabase/PTTData')
data = PTTData('Gossiping', '/home/vis/pttDatabase/PTTData')
sentence_length = 100
use_push_count = False

@ -2,7 +2,6 @@ import jieba
import csv
import nltk
import re
import json
from jieba import posseg
from nltk import tokenize
from langdetect import detect
@ -28,41 +27,42 @@ def filterPOS(text):
def processText(randId, text, stopwords):
if (text == ''):
if(text == ''):
return ''
lang = detect(text)
sentenses = []
sentenses_raw = []
print(lang)
if (lang == 'zh-cn' or lang == 'zh-tw' or lang == 'ko'):
splitted = re.split('。|[\n]+', text)
print(splitted)
cutted = []
for spl in splitted:
cutted.append(filterPOS(spl))
for i in splitted:
cutted.append(filterPOS(i))
print(cutted)
for spl, raw in zip(cutted, splitted):
sentenses.append(' '.join(spl))
sentenses_raw.append(raw)
for i in cutted:
result = []
for j in i:
if (j in stopwords):
continue
result.append(j)
if (len(result) >= 20):
sentenses.append(' '.join(result.copy()))
result = []
if (result != []):
sentenses.append(' '.join(result))
else:
sentenses = []
sentenses_raw = []
for sentence_raw in tokenize.sent_tokenize(text):
words = sentence_raw.lower().split(' ')
for sentence in tokenize.sent_tokenize(text):
words = sentence.lower().split(' ')
print([''.join([a for a in w1 if a.isalpha()]) for w1 in words])
sentence = ' '.join([w for w in [''.join([a for a in w1 if a.isalpha()]) for w1 in words] if w not in [sw.lower() for sw in stopwords]])
sentenses.append(sentence)
sentenses_raw.append(sentence_raw)
result = []
for index, raw_pair in enumerate(zip(sentenses, sentenses_raw)):
sentence, sentence_raw = raw_pair
result.append({
'id': index,
'text': sentence,
'count': 10,
'rawtxt': sentence_raw,
})
with open('data/' + randId + '.json', 'w', newline='', encoding="utf-8") as fp:
json.dump(result, fp, ensure_ascii=False, indent=4)
return ('data/' + randId + '.json')
result.append(['id', 'text', 'count'])
for index, sentence in enumerate(sentenses):
result.append([index, sentence, 1000])
with open('data/' + randId + '.tsv', 'w', newline='', encoding="utf-8") as f:
writer = csv.writer(f, delimiter='\t')
writer.writerows(result)
f.close()
return ('data/' + randId + '.tsv')

Binary file not shown.

Before

Width:  |  Height:  |  Size: 93 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 250 KiB

@ -186,7 +186,6 @@ li a {
from {
opacity: 0;
}
to {
opacity: 1;
}
@ -203,7 +202,6 @@ li a {
from {
opacity: 1;
}
to {
opacity: 0;
}
@ -262,7 +260,7 @@ li a {
#vis {
display: inline-block;
background-color: transparent;
background-color: aliceblue;
position: relative;
border-radius: 30px;
resize: both;

@ -1,33 +1,6 @@
var tsvPath
var stopwords = []
const init = () => {
$(window).on('mousemove', (e) => {
$('#nodeTitle').css({
left: e.pageX,
top: e.pageY
})
})
$('#minRatioLabel').on('mouseenter', () => {
$('#nodeTitle').removeClass('hidden')
$('#nodeTitleContent').html('兩個相鄰單詞之間出現頻率比值的最小值,小於該值不會被演算法選擇')
}).on('mouseleave', () => {
$('#nodeTitle').toggleClass('hidden')
})
$('#maxRatioLabel').on('mouseenter', () => {
$('#nodeTitle').removeClass('hidden')
$('#nodeTitleContent').html('兩個相鄰單詞之間出現頻率比值的最大值,大於該值不會被演算法選擇')
}).on('mouseleave', () => {
$('#nodeTitle').toggleClass('hidden')
})
$('#wordcount').on('mouseenter', () => {
$('#nodeTitle').removeClass('hidden')
$('#nodeTitleContent').html('僅計算中文字的字數')
}).on('mouseleave', () => {
$('#nodeTitle').toggleClass('hidden')
})
}
function clearStopWord() {
stopwords = []
$('#sweContainer').html('')
@ -42,7 +15,7 @@ function addStopWord() {
} else {
stopwords.push(newsw)
$('#sweContainer').append($('<li>').attr('class', 'w3-display-container').append($('<span>').append(newsw)).append($('<span>').attr('class', 'w3-button w3-hover-red w3-transparent w3-display-right').click(function (e) {
$('#sweContainer').append($('<li>').attr('class', 'w3-display-container').append($('<span>').append(newsw)).append($('<span>').attr('class', 'w3-button w3-hover-red w3-transparent w3-display-right').click(function(e) {
var index = $(this).parent().index()
console.log(stopwords[index])
stopwords.splice(index, 1)
@ -60,14 +33,14 @@ function addStopWord() {
function showStopwordEditor() {
console.log(stopwords)
$(window).unbind('keydown')
$(window).keydown(function (event) {
$(window).keydown(function(event) {
if (event.keyCode == 13) {
addStopWord()
}
})
$('#sweContainer').empty()
for (word of stopwords) {
$('#sweContainer').append($('<li>').attr('class', 'w3-display-container').append($('<span>').append(word)).append($('<span>').attr('class', 'w3-button w3-hover-red w3-transparent w3-display-right').click(function (e) {
$('#sweContainer').append($('<li>').attr('class', 'w3-display-container').append($('<span>').append(word)).append($('<span>').attr('class', 'w3-button w3-hover-red w3-transparent w3-display-right').click(function(e) {
var index = $(this).parent().index()
console.log(stopwords[index])
stopwords.splice(index, 1)
@ -80,7 +53,7 @@ function showStopwordEditor() {
function hideStopWordEditor() {
$(window).unbind('keydown')
$(window).keydown(function (event) {
$(window).keydown(function(event) {
if (event.keyCode == 13) {
event.preventDefault()
sendRequest()
@ -101,7 +74,7 @@ function submit() {
stopwords: stopwords
}),
contentType: 'application/json',
success: function (data) {
success: function(data) {
tsvPath = data.Result.path
destroyCurrentGraph()
d3.select('#graph').append('div').attr('id', 'vis')
@ -113,42 +86,23 @@ function submit() {
function buildSentetree() {
console.log("Build.")
let model;
let tree;
let data;
const graph = d3.json(tsvPath, buildTree);
var model;
var tree;
var data;
const graph = d3.tsv(tsvPath, buildTree);
function buildTree(error, rawdata) {
console.log(rawdata)
const data = rawdata.map(d => Object.assign({}, d, { count: +d.count }));
console.log({ data })
let minRatio = $('#minRatio').val()
let maxRatio = $('#maxRatio').val()
console.log({ minRatio, maxRatio })
model = new SentenTree.SentenTreeBuilder()
.tokenize(SentenTree.tokenizer.tokenizeBySpace)
.transformToken(token => (/score(d|s)?/.test(token) ? 'score' : token))
.buildModel(data, { maxSupportRatio: maxRatio, minSupportRatio: minRatio });
.buildModel(data, { maxSupportRatio: 1 });
tree = new SentenTree.SentenTreeVis('#vis', {
fontSize: [15, 40],
gapBetweenGraph: 10
});
tree.data(model.getRenderedGraphs(2))
.on('nodeMouseenter', (node) => {
console.log(node)
$('#nodeTitle').removeClass('hidden')
$('#nodeTitleContent').html('<ul>' + node.data.topEntries.map((n) => "<li>" + data[n.id].rawtxt + "</li>").join('') + "</ul>")
})
.on('nodeMouseleave', () => {
$('#nodeTitle').addClass('hidden')
})
.on('linkMouseenter', (node) => {
$('#nodeTitle').removeClass('hidden')
$('#nodeTitleContent').html('出現次數:' + (node.freq / 10))
}).on('linkMouseleave', () => {
$('#nodeTitle').addClass('hidden')
})
new ResizeSensor(jQuery('#d3kitRoot'), function () {
new ResizeSensor(jQuery('#d3kitRoot'), function() {
var scale, origin;
scale = Math.min(2, ($('#graph').outerWidth()) / ($('#d3kitRoot').outerWidth() + 60))
@ -171,13 +125,4 @@ function switchMessageBox() {
} else {
$('#toggleTextBox').html('隱藏文字輸入區')
}
}
function countWords() {
text = $("#rawTextBox").val()
let wordCount = text.split(new RegExp("[\u4e00-\u9fa5]")).length - 1
console.log(wordCount)
$("#wordcount").html('字數:' + wordCount)
}
init()
}

@ -17,7 +17,7 @@ function init() {
type: 'POST',
url: '/init',
dataType: 'json',
success: function (data) {
success: function(data) {
console.log(data)
setDate(data.Result.startDate, data.Result.endDate)
document.getElementById('keywordBox').value = data.Result.keyword
@ -39,44 +39,44 @@ function init() {
buildSentetree()
}
})
$(document).ready(function () {
$(window).keydown(function (event) {
$(document).ready(function() {
$(window).keydown(function(event) {
if (event.keyCode == 13) {
event.preventDefault()
sendRequest()
}
});
});
$(window).on('mousemove', function (e) {
$(window).on('mousemove', function(e) {
$('#nodeTitle').css({
left: e.pageX,
top: e.pageY
})
})
$('#titleListContainer').hover(
function () { // Run on hover/mouseenter
function() { // Run on hover/mouseenter
$(this).css('overflow', 'auto')
},
function () { // Run on mouseleave
function() { // Run on mouseleave
$(this).css('overflow', 'hidden')
}
)
$('#titleListLayer').click(function (e) {
$('#titleListLayer').click(function(e) {
if ($('#titleListLayer').is(e.target)) {
hideTitles()
}
})
$('#stopWordEditorLayer').click(function (e) {
$('#stopWordEditorLayer').click(function(e) {
if ($('#stopWordEditorLayer').is(e.target)) {
hideStopWordEditor()
}
})
$('#idfEditorLayer').click(function (e) {
$('#idfEditorLayer').click(function(e) {
if ($('#idfEditorLayer').is(e.target)) {
hideIdfEditor()
}
})
$('#pttPageWindow').click(function (e) {
$('#pttPageWindow').click(function(e) {
if ($('#pttPageWindow').is(e.target)) {
hidePTTPage()
}
@ -88,23 +88,23 @@ function init() {
function loadTemplate(num) {
templates = [{
startDate: '2020-12-01',
endDate: '2020-12-31',
keyword: '',
mode: 1
},
{
startDate: '2020-01-01',
endDate: '2020-03-01',
keyword: '衛生紙',
mode: 2
},
{
startDate: '2020-01-11',
endDate: '2020-01-12',
keyword: '',
mode: 2
}
startDate: '2020-12-01',
endDate: '2020-12-31',
keyword: '',
mode: 1
},
{
startDate: '2020-01-01',
endDate: '2020-03-01',
keyword: '衛生紙',
mode: 2
},
{
startDate: '2020-01-11',
endDate: '2020-01-12',
keyword: '',
mode: 2
}
]
chosenTemp = templates[num]
setDate(chosenTemp.startDate, chosenTemp.endDate)
@ -127,7 +127,7 @@ function addStopWord() {
} else {
stopwords.push(newsw)
$('#sweContainer').append($('<li>').attr('class', 'w3-display-container').append($('<span>').append(newsw)).append($('<span>').attr('class', 'w3-button w3-hover-red w3-transparent w3-display-right').click(function (e) {
$('#sweContainer').append($('<li>').attr('class', 'w3-display-container').append($('<span>').append(newsw)).append($('<span>').attr('class', 'w3-button w3-hover-red w3-transparent w3-display-right').click(function(e) {
var index = $(this).parent().index()
console.log(stopwords[index])
stopwords.splice(index, 1)
@ -172,14 +172,14 @@ function scrollIdfList() {
function showStopwordEditor() {
$(window).unbind('keydown')
$(window).keydown(function (event) {
$(window).keydown(function(event) {
if (event.keyCode == 13) {
addStopWord()
}
})
$('#sweContainer').empty()
for (word of stopwords) {
$('#sweContainer').append($('<li>').append($('<span>').append(word)).append($('<span>').attr('class', 'w3-button w3-hover-red w3-transparent w3-display-right').click(function (e) {
$('#sweContainer').append($('<li>').append($('<span>').append(word)).append($('<span>').attr('class', 'w3-button w3-hover-red w3-transparent w3-display-right').click(function(e) {
var index = $(this).parent().index()
console.log(stopwords[index])
stopwords.splice(index, 1)
@ -192,27 +192,27 @@ function showStopwordEditor() {
function showIdfEditor() {
$(window).unbind('keydown')
$(window).keydown(function (event) {
$(window).keydown(function(event) {
if (event.keyCode == 13) {
scrollIdfList()
}
})
$('#ieContainer').empty().append(
$('<thead>').append($('<tr>')
.append($('<th>')
.attr('style', 'position: sticky; top: 0; background: white;')
.append('單詞'))
.append($('<th>')
.attr('class', 'w3-center-align')
.attr('style', 'position: sticky; top: 0; background: white;')
.append('操作'))
.append($('<th>')
.attr('class', 'w3-right-align')
.attr('style', 'position: sticky; top: 0; background: white;')
.append('單詞頻率')
$('<thead>').append($('<tr>')
.append($('<th>')
.attr('style', 'position: sticky; top: 0; background: white;')
.append('單詞'))
.append($('<th>')
.attr('class', 'w3-center-align')
.attr('style', 'position: sticky; top: 0; background: white;')
.append('操作'))
.append($('<th>')
.attr('class', 'w3-right-align')
.attr('style', 'position: sticky; top: 0; background: white;')
.append('單詞頻率')
)
)
)
)
.append($('<tbody>'))
for (word of Object.entries(idfTable).sort((a, b) => { return (b[1] - a[1]) }).map((a) => { return a[0] }).slice(0, 1000)) {
$('#ieContainer').find('tbody')
@ -226,7 +226,7 @@ function showIdfEditor() {
.append($('<button>')
.attr('class', 'general-button')
.html('設為最小')
.click(function () {
.click(function() {
$(this)
.parent()
.parent()
@ -236,7 +236,7 @@ function showIdfEditor() {
.append($('<button>')
.attr('class', 'general-button')
.html('設為最大')
.click(function () {
.click(function() {
$(this)
.parent()
.parent()
@ -248,7 +248,7 @@ function showIdfEditor() {
.append($('<button>')
.attr('class', 'general-button')
.html('重設')
.click(function () {
.click(function() {
var _word = $($(this)
.parent()
.parent()
@ -275,7 +275,7 @@ function showIdfEditor() {
function hideStopWordEditor() {
$(window).unbind('keydown')
$(window).keydown(function (event) {
$(window).keydown(function(event) {
if (event.keyCode == 13) {
event.preventDefault()
sendRequest()
@ -286,7 +286,7 @@ function hideStopWordEditor() {
function hideIdfEditor() {
$(window).unbind('keydown')
$(window).keydown(function (event) {
$(window).keydown(function(event) {
if (event.keyCode == 13) {
event.preventDefault()
sendRequest()
@ -369,7 +369,7 @@ function sendRequest() {
url: '/addRequest',
data: content,
contentType: 'application/json',
success: function (data) {
success: function(data) {
console.log(data)
changeGraph(data.Result)
}
@ -460,16 +460,16 @@ function buildSentetree() {
})
})
} else {
seqList = node.data.seq.DBs.map(function (n) {
seqList = node.data.seq.DBs.map(function(n) {
return n.rawText
})
}
titleList = []
for (s of seqList) {
titleTemp = wordTitleList[s]
if ((titleList.map(function (n) {
return n.title
})).indexOf(titleTemp.title) == -1) {
if ((titleList.map(function(n) {
return n.title
})).indexOf(titleTemp.title) == -1) {
titleList.push(titleTemp)
}
}
@ -508,7 +508,7 @@ function buildSentetree() {
globKeyword: globKeyword
}),
contentType: 'application/json',
success: function (data) {
success: function(data) {
console.log(data)
$('#titleListKeywordInfo').html('單詞出現次數:' + data.Result.wordCount + ', 單詞出現的文章數:' + data.Result.postCount + ', 單詞頻率:' + (data.Result.postCount * 100 / totalPosts).toFixed(2) + '%')
}
@ -517,26 +517,26 @@ function buildSentetree() {
for (i of titleList) {
$('#titleListContainer').append(
$('<li>').attr('class', 'w3-panel')
.css('cursor', 'pointer').append(
$('<p>').attr('target', '_blank').append(
$('<h4>').html(i.title)
).append(
$('<span>').attr('style', 'margin: 0px 10px').html(i.author)
).append(
$('<span>').attr('style', 'margin: 0px 10px').html(i.date)
).append(
$('<span>').attr('style', 'margin: 0px 10px').html('推文數:' + i.pushes)
)
).click(function () {
let indx = $(this).index()
showPTTPage((titleList[indx].url).replace('www.ptt.cc', 'www.pttweb.cc'))
})
.css('cursor', 'pointer').append(
$('<p>').attr('target', '_blank').append(
$('<h4>').html(i.title)
).append(
$('<span>').attr('style', 'margin: 0px 10px').html(i.author)
).append(
$('<span>').attr('style', 'margin: 0px 10px').html(i.date)
).append(
$('<span>').attr('style', 'margin: 0px 10px').html('推文數:' + i.pushes)
)
).click(function() {
let indx = $(this).index()
showPTTPage((titleList[indx].url).replace('www.ptt.cc', 'www.pttweb.cc'))
})
)
}
})
.on('nodeMouseenter', node => {
console.log(node)
titles = node.data.topEntries.map(function (x) {
titles = node.data.topEntries.map(function(x) {
return wordTitleList[x.rawText]
})
console.log(titles)
@ -564,7 +564,7 @@ function buildSentetree() {
}).on('linkMouseenter', link => {
console.log(link)
})
new ResizeSensor(jQuery('#d3kitRoot'), function () {
new ResizeSensor(jQuery('#d3kitRoot'), function() {
var scale, origin;
scale = Math.min(2, ($('#graph').outerWidth()) / ($('#d3kitRoot').outerWidth() + 60))

@ -9,58 +9,44 @@
</head>
<body>
<div id="nodeTitle" class="nodeTitle hidden">
<div id="nodeTitleContent">test</div>
</div>
<div id="stopWordEditorLayer" class="info hidden">
<div id="stopWordEditor">
<h4 id="sweTitle" style="margin:10px; display: inline;">編輯停用詞</h4>
<ul id="sweContainer" class="w3-ul w3-hoverable" style="margin-bottom: 10px;"></ul>
<div>
<input class="w3-input w3-border" style="width: 85%; display: inline;" type="text" id="newStopWord"
placeholder="新增停用詞(以空白隔開)">
<button class="general-button w3-right" type="button" id="confirm"
style="background-color: #379; margin-left: 8px;" onclick="addStopWord()">新增</button>
<input class="w3-input w3-border" style="width: 85%; display: inline;" type="text" id="newStopWord" placeholder="新增停用詞(以空白隔開)">
<button class="general-button w3-right" type="button" id="confirm" style="background-color: #379; margin-left: 8px;" onclick="addStopWord()">新增</button>
</div>
<div id="sweButtons" style="margin: 20px 0px;">
<button class="general-button" type="button" id="confirm"
style="background-color: #379; margin: 0px 10px"
onclick="hideStopWordEditor(); submit()">確認</button>
<button class="general-button" type="button" id="confirm"
style="background-color: #379; margin: 0px 10px" onclick="downloadStopWord()">匯出停用詞</button>
<button class="general-button" type="button" id="confirm"
style="background-color: #379; margin: 0px 10px" onclick="clearStopWord()">全部清除</button>
<button class="general-button w3-right" type="button" id="confirm"
style="background-color: #379; margin: 0px 20px" onclick="hideStopWordEditor()">返回</button>
<button class="general-button" type="button" id="confirm" style="background-color: #379; margin: 0px 10px" onclick="hideStopWordEditor(); submit()">確認</button>
<button class="general-button" type="button" id="confirm" style="background-color: #379; margin: 0px 10px" onclick="downloadStopWord()">匯出停用詞</button>
<button class="general-button" type="button" id="confirm" style="background-color: #379; margin: 0px 10px" onclick="clearStopWord()">全部清除</button>
<button class="general-button w3-right" type="button" id="confirm" style="background-color: #379; margin: 0px 20px" onclick="hideStopWordEditor()">返回</button>
</div>
</div>
</div>
<div class='w3-bar w3-teal'>
<button class="w3-button w3-teal" type="button" onclick="location.href='/generalTxt'"
style="color: darkseagreen;">泛用文字視覺化工具</button>
<button class="w3-button w3-teal" type="button" onclick="location.href='/ptt'">PTT Sententree</button>
<button class="w3-button" type="button" onclick="location.href='/ptt_push'">推文Sententree</button>
<button class="w3-button w3-teal" type="button" onclick="location.href='/generalTxt'" style="color: darkseagreen;">泛用文字視覺化工具</button>
</div>
<div id='heading'>
<h2>泛用文字視覺化工具</h2>
<p>SentenTree <a href="https://github.com/twitter/SentenTree">https://github.com/twitter/SentenTree</a></p>
<p id="comment">點此查看<a href="/generaltxt/help">使用說明</a></p>
<p id='comment'>這是泛用.txt檔視覺化工具能夠簡單處理文字檔的視覺化。</p>
<p id='comment'>支援的語言:繁體中文、英文以及所有使用空格分詞的語言。</p>
<p id='comment'>使用繁體中文Jieba斷詞器不保證簡體中文能夠正常使用。</p>
</div>
<div style="margin:10px;">
<button class="general-button" type="button" id="editSWButton" style="margin:10px 0px;"
onclick="showStopwordEditor()">編輯停用詞</button>
<label id="minRatioLabel">Min Ratio</label>
<input type="number" step="0.0001" id="minRatio" value="0.001" min="0.0001" max="1">
<label id="maxRatioLabel">Max Ratio</label>
<input type="number" step="0.0001" id="maxRatio" value="1" min="0.0001" max="1">
<button class="general-button" type="button" id="editSWButton" style="margin:10px 0px;" onclick="showStopwordEditor()">編輯停用詞</button>
</div>
<div id='rawText' class=''>
<div id="wordcount">字數0</div>
<textarea id='rawTextBox' rows=25 placeholder="輸入要視覺化的文字
換行為斷句" onchange="countWords()"></textarea>
換行為斷句"></textarea>
<button class='general-button' style='margin: 10px 0px' onclick="submit()">提交</button>
</div>
<div>
<button id='toggleTextBox' class='general-button' style='margin: 0px 10px'
onclick="switchMessageBox()">隱藏文字視窗</button>
<button id='toggleTextBox' class='general-button' style='margin: 0px 10px' onclick="switchMessageBox()">隱藏文字視窗</button>
<div id='graph' class='hidden'>
<div id='vis'></div>
</div>

@ -1,46 +0,0 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<link href="/static/css/w3.css" type="text/css" rel="stylesheet">
<link href="/static/css/main.css" type="text/css" rel="stylesheet">
<title>使用說明</title>
</head>
<body>
<h1>使用說明</h1>
<h2>基本說明</h2>
<p>泛用文字視覺化工具能夠簡單處理文字檔的視覺化。<br>
支援的語言:繁體中文、英文以及所有使用空格分詞的語言。<br>
使用繁體中文Jieba斷詞器不保證簡體中文能夠正常使用。</p>
<h2>參數</h2>
<p>此工具提供 <span style="color: red;">minRatio</span><span style="color: red;">maxRatio</span> 兩個參數的設定<br>
兩個參數代表相鄰兩個單詞(有被連接的單詞)之間的最大或最小比值<br>
例如maxRatio 為 0.8 時,代表兩個相鄰的單詞出現的頻率必須小於 0.8,否則單詞就不會被演算法選中。
</p>
<h2>輸入資料前處理</h2>
<p>本工具會將輸入資料做預先處理。以中文語料為例,處理流程大致如下:</p>
<p>1. 斷句:使用中文的全形句號(。)及換行進行斷句</p>
<p>2. 斷詞並標記詞性:使用 Jieba 將每個句子分別斷詞,並標註其詞性</p>
<p>3. 過濾詞性:將英文及數字過濾,以免產生過多雜訊</p>
<h2>停用詞</h2>
<p>使用者可以編輯停用詞,被設定為停用詞的單詞,將不會被選擇到 sententree 中。</p>
<p>在輸入停用詞時,可以一次輸入多個停用詞,並使用空格(半形)分開。</p>
<h2>Sententree 圖形</h2>
<p>輸入一份文件預設會產生2個 sententree 圖形,圖 1 為一個 sententree 的圖形</p>
<p>每個圖形中間最大的單詞為<span style="color:blue;">根單詞</span></p>
<p>其中第二個圖形中不會包含第一個圖形的根單詞</p>
<p>單詞之間的連線代表兩個單詞有在同一個句子中出現過</p>
<p>灰色連線代表演算法在搜尋時,兩個單詞屬於同一個階層(出現在相同的句子中)</p>
<p>橘色連線代表兩個單詞屬於不同階層</p>
<p>連線的粗細代表兩個單詞同時出現的比例</p>
<p>將滑鼠移到單詞上,能夠看到包含該單詞的完整句子(最多顯示 5 筆),如圖 2</p>
<img style="width: 100%;" src="/img/general_txt_help_g01.png">
<span>圖 1Sententree 圖形</span>
<img style="width: 100%;" src="/img/general_txt_help_g02.png">
<span>圖 2完整句子顯示</span>
</body>
</html>
Loading…
Cancel
Save