Merge remote-tracking branch 'origin/dev/addPush'

master
Zovjsra 4 years ago
commit cd07e4f5f4

@ -67,7 +67,7 @@ def ptt_push():
@app.route('/ptt_push/init', methods=['POST'])
def pttPushInit():
author = next(pttPush.getMostFrequentAuthor())
author = 'a58461351'
pushes = pttPush.findAuthorPush(
author=[author], stopwords=pttPush.defaultStopWords)
result: dict = {

@ -31,6 +31,8 @@ with open('/home/vis/pttDatabase/PTTData/Gossiping/content/content.pck', 'rb') a
defaultStopWords = []
data = PTTData('Gossiping', '/home/vis/pttDatabase/PTTData')
sentence_length = 100
use_push_count = False
with open('resource/stopWords.txt', 'r', encoding='UTF-8') as file:
for word in file.readlines():
@ -75,7 +77,7 @@ def contentProcess(content, text):
result = []
for i in cutted:
result.append(i)
if (len(result) >= 50):
if (len(result) >= sentence_length):
sentenses.append(result.copy())
result = []
if (result != []):
@ -324,7 +326,7 @@ def getDefault(startDate, endDate):
'date': datetime.strptime(i['date'], '%Y%m%d%H%M%S').strftime("%a %b %d %H:%M:%S %Y"),
'part': i['content'][max(0, cut[0] - 20): min(len(i['content']), cut[1])].replace('\n', '')
}
result.append([len(result), seq, 1000 + i['pushes']])
result.append([len(result), seq, (1000 + i['pushes'])if use_push_count else 3000])
fileString = io.StringIO()
writer = csv.writer(fileString, delimiter='\t')

@ -67,4 +67,9 @@
所以
記者
媒體

@ -112,6 +112,24 @@ html {
transition-duration: 0.5s;
}
#pttPageWindowContent {
display: block;
position: fixed;
background-color: #FFF;
left: 50%;
top: 50%;
transform: translate(-50%, -50%);
width: 90%;
height: 90%;
border: lightgray;
border-width: 1px;
border-style: solid;
border-radius: 20px;
padding: 26px;
z-index: 99;
box-shadow: 0px 5px 20px rgba(0, 0, 0, .3);
}
.deleteListElement {
position: absolute;
right: 0;
@ -141,6 +159,18 @@ li a {
}
.info {
background-color: rgba(255, 255, 255, 0.6);
animation: fadeIn 0.2s;
animation-fill-mode: forwards;
position: fixed;
display: flex;
top: 0;
width: 100%;
height: 100%;
z-index: 90;
}
.info99 {
background-color: rgba(255, 255, 255, 0.6);
animation: fadeIn 0.2s;
animation-fill-mode: forwards;
@ -189,7 +219,7 @@ li a {
height: auto;
border-radius: 15px;
padding: 10px 15px;
z-index: 99;
z-index: 90;
align-content: center;
}

@ -1,6 +1,7 @@
var idfTable
var idfTableOrig
var idfBase = 1000
var mode = 0
$.ajax({
url: '/resource/idfTable.json',
async: false,

@ -76,6 +76,41 @@ function init() {
hideIdfEditor()
}
})
$('#pttPageWindow').click(function(e) {
if ($('#pttPageWindow').is(e.target)) {
hidePTTPage()
}
})
changeMode(0)
destroyCurrentGraph()
buildSentetree()
}
function loadTemplate(num) {
templates = [{
startDate: '2020-12-01',
endDate: '2020-12-31',
keyword: '',
mode: 1
},
{
startDate: '2020-01-01',
endDate: '2020-03-01',
keyword: '衛生紙',
mode: 2
},
{
startDate: '2020-01-11',
endDate: '2020-01-12',
keyword: '',
mode: 2
}
]
chosenTemp = templates[num]
setDate(chosenTemp.startDate, chosenTemp.endDate)
$('#keywordBox').val(chosenTemp.keyword)
changeMode(chosenTemp.mode)
sendRequest()
}
function clearStopWord() {
@ -107,6 +142,17 @@ function addStopWord() {
$('#newStopWord').val('')
}
function changeMode(_mode) {
for (i = 0; i < 4; i++) {
if (i == _mode) {
$('#modeSelector button').eq(i).css("color", "#aaa")
} else {
$('#modeSelector button').eq(i).css("color", "#000")
}
}
mode = _mode
}
function scrollIdfList() {
let targetWord = $('#idfTarget').val()
let wordList = $("#ieContainer").find("tr").slice(1).map((_, a) => { return $($(a).find("td")[0]).html() }).get()
@ -156,8 +202,12 @@ function showIdfEditor() {
.append($('<th>')
.attr('style', 'position: sticky; top: 0; background: white;')
.append('單詞'))
.append($('<th>'))
.append($('<th>').attr('class', 'w3-right-align')
.append($('<th>')
.attr('class', 'w3-center-align')
.attr('style', 'position: sticky; top: 0; background: white;')
.append('操作'))
.append($('<th>')
.attr('class', 'w3-right-align')
.attr('style', 'position: sticky; top: 0; background: white;')
.append('單詞頻率')
)
@ -245,6 +295,15 @@ function hideIdfEditor() {
$('#idfEditorLayer').addClass('hidden')
}
function showPTTPage(url) {
$('#pttPageWindowContent iframe').attr('src', url)
$('#pttPageWindow').removeClass('hidden')
}
function hidePTTPage() {
$('#pttPageWindow').addClass('hidden')
}
function updateIdfTable() {
let wordList = $("#ieContainer").find("tr").slice(1).map((_, a) => {
return [
@ -301,6 +360,23 @@ function closeEventListner() {
}
function sendRequest() {
content = getContent()
startDate = $('#startDate').val()
endDate = $('#endDate').val()
console.log(content)
$.ajax({
type: 'POST',
url: '/addRequest',
data: content,
contentType: 'application/json',
success: function(data) {
console.log(data)
changeGraph(data.Result)
}
})
}
function getContent() {
content = JSON.stringify({
startDate: $('#startDate').val(),
endDate: $('#endDate').val(),
@ -316,19 +392,7 @@ function sendRequest() {
other: $('#other').is(':checked')
}
})
startDate = $('#startDate').val()
endDate = $('#endDate').val()
console.log(content)
$.ajax({
type: 'POST',
url: '/addRequest',
data: content,
contentType: 'application/json',
success: function(data) {
console.log(data)
changeGraph(data.Result)
}
})
return content
}
function changeGraph(data) {
@ -386,7 +450,8 @@ function buildSentetree() {
gapBetweenGraph: 10
});
console.log(tree)
tree.data(model.getRenderedGraphs(2))
let nGraph = globKeyword == "" ? 5 : 2
tree.data(model.getRenderedGraphs(nGraph))
.on('nodeClick', node => {
if ('mergedData' in node.data) {
seqList = node.data.mergedData.map((d) => {
@ -451,8 +516,9 @@ function buildSentetree() {
$('#titleListContainer').empty()
for (i of titleList) {
$('#titleListContainer').append(
$('<li>').attr('class', 'w3-panel').append(
$('<a>').attr('href', i.url).attr('target', '_blank').append(
$('<li>').attr('class', 'w3-panel')
.css('cursor', 'pointer').append(
$('<p>').attr('target', '_blank').append(
$('<h4>').html(i.title)
).append(
$('<span>').attr('style', 'margin: 0px 10px').html(i.author)
@ -461,7 +527,10 @@ function buildSentetree() {
).append(
$('<span>').attr('style', 'margin: 0px 10px').html('推文數:' + i.pushes)
)
)
).click(function() {
let indx = $(this).index()
showPTTPage((titleList[indx].url).replace('www.ptt.cc', 'www.pttweb.cc'))
})
)
}
})

@ -1,6 +1,7 @@
init()
var tsvPath = ''
var titlePath = ''
var tsvString
var defaultStartDate
var defaultEndDate
var totalPosts
@ -24,6 +25,7 @@ function init() {
console.log(wordPushList)
$('#idBox').val(data.Result.author)
buildSentetree(tsvString)
changeMode(2)
}
})
$(document).ready(function() {
@ -65,6 +67,31 @@ function init() {
})
}
function loadTemplate(num) {
templates = [{
userId: '',
aid: '1Vv4iFY6',
keyword: '',
mode: 0,
}, {
userId: '',
aid: '1VyJ2vP_',
keyword: '',
mode: 0
}, {
userId: 'xetherz3',
aid: '',
keyword: '',
mode: 0
}]
let chosenTemplate = templates[num]
$('#idBox').val(chosenTemplate.userId)
$('#titleBox').val(chosenTemplate.aid)
$('#keywordBox').val(chosenTemplate.keyword)
changeMode(0)
sendRequest()
}
function clearStopWord() {
stopwords = []
$('#sweContainer').html('')
@ -111,6 +138,20 @@ function addStopWord() {
$('#newStopWord').val('')
}
function changeMode(_mode) {
for (i = 0; i < 4; i++) {
if (i == _mode) {
$('#modeSelector button').eq(i).css("color", "#aaa")
} else {
$('#modeSelector button').eq(i).css("color", "#000")
}
}
mode = _mode
destroyCurrentGraph()
d3.select('#graph').append('div').attr('id', 'vis')
buildSentetree(tsvString)
}
function showIdfEditor() {
$(window).unbind('keydown')
$(window).keydown(function(event) {
@ -123,7 +164,10 @@ function showIdfEditor() {
.append($('<th>')
.attr('style', 'position: sticky; top: 0; background: white;')
.append('單詞'))
.append($('<th>'))
.append($('<th>')
.attr('class', 'w3-center-align')
.attr('style', 'position: sticky; top: 0; background: white;')
.append('操作'))
.append($('<th>').attr('class', 'w3-right-align')
.attr('style', 'position: sticky; top: 0; background: white;')
.append('單詞頻率')
@ -190,6 +234,17 @@ function showIdfEditor() {
$('#idfEditorLayer').removeClass('hidden')
}
function hideIdfEditor() {
$(window).unbind('keydown')
$(window).keydown(function(event) {
if (event.keyCode == 13) {
event.preventDefault()
sendRequest()
}
})
$('#idfEditorLayer').addClass('hidden')
}
function showStopwordEditor() {
console.log(stopwords)
$(window).unbind('keydown')
@ -296,7 +351,7 @@ function sendRequest() {
function changeGraph(data) {
console.log(data)
let tsvString = data.tsv
tsvString = data.tsv
let json = JSON.parse(data.json)
destroyCurrentGraph()
d3.select('#graph').append('div').attr('id', 'vis')

@ -19112,6 +19112,7 @@
var word = null;
var count = 0;
var len = seq.words.length;
var root = seq.newWord ? seq.newWord.word : undefined
var _loop = function _loop(s) {
var fdist = {};
@ -19133,6 +19134,8 @@
}
});
console.log(seq)
var maxw = null;
var maxc = 0;
@ -19143,20 +19146,65 @@
words.forEach(function(w) {
var value = fdist[w];
//if (value < maxSupport && (value / Math.max(idfTable[itemset[w]], idfBase)) * idfBase > maxc) {
//if (value < maxSupport && value > maxc && (stopwords.indexOf(itemset[w]) < 0 || isNotRoot)) {
if (value < maxSupport && value > maxc && stopwords.indexOf(itemset[w]) < 0) {
maxw = +w;
//maxc = (value / Math.max(idfTable[itemset[w]], idfBase)) * idfBase
maxc = value
var distRatio = 0.1
if (root) {
var dist = Math.abs(words.indexOf(w) - words.indexOf(root))
}
switch (mode) {
case 0:
if (!root) {
if (value < maxSupport && value > maxc && stopwords.indexOf(itemset[w]) < 0) {
maxw = +w
maxc = value
}
} else {
if (value < maxSupport && value > maxc && dist < 2 && stopwords.indexOf(itemset[w]) < 0 && dist == 1) {
maxw = +w
maxc = value
console.log(maxc)
}
}
break
case 1:
if (value < maxSupport && (value / Math.max(idfTable[itemset[w]], idfBase)) * idfBase > maxc) {
//if (value < maxSupport && value > maxc && (stopwords.indexOf(itemset[w]) < 0 || isNotRoot)) {
//if (value < maxSupport && value > maxc && stopwords.indexOf(itemset[w]) < 0) {
maxw = +w;
maxc = (value / Math.max(idfTable[itemset[w]], idfBase)) * idfBase
//maxc = value
}
break
case 3:
//if (value < maxSupport && (value / Math.max(idfTable[itemset[w]], idfBase)) * idfBase > maxc) {
if (value < maxSupport && value > maxc && (stopwords.indexOf(itemset[w]) < 0 || isNotRoot)) {
//if (value < maxSupport && value > maxc && stopwords.indexOf(itemset[w]) < 0) {
maxw = +w;
//maxc = (value / Math.max(idfTable[itemset[w]], idfBase)) * idfBase
maxc = value
}
break
case 2:
//if (value < maxSupport && value > maxc && (stopwords.indexOf(itemset[w]) < 0 || isNotRoot)) {
if (value < maxSupport && value > maxc && stopwords.indexOf(itemset[w]) < 0) {
maxw = +w;
//maxc = (value / Math.max(idfTable[itemset[w]], idfBase)) * idfBase
maxc = value
}
break
default:
break
}
});
console.log(itemset)
if (maxc > count) {
pos = s;
word = maxw;
count = maxc;
console.log({ 'word#': maxw, 'word': itemset[maxw], 'root': seq.newWord ? seq.newWord.entity : null, 'count': root ? count : "root" })
}
};
@ -19188,7 +19236,6 @@
}
}
}
console.log({ 's0': s0, 's1': s1 })
return { word: word, pos: pos, count: count, s0: s0, s1: s1 };
}
@ -19206,12 +19253,10 @@
seqs.push(rootSeq);
var leafSeqs = [];
console.log(JSON.parse(JSON.stringify(seqs)))
while (!seqs.empty() && expandCnt > 0) {
/* find the candidate sequence with largest support DB */
var s = seqs.pop();
console.log({ 's': s, 'seqs': seqs })
var graph = s.graph;
var s0 = s.r;
var s1 = s.l;
@ -19219,7 +19264,6 @@
if (!s0 && !s1) {
/* find the next frequent sequence */
var result = growSeq(s, terms, minSupport, maxSupport, itemset);
console.log(result)
s0 = result.s0;
s1 = result.s1;
var word = result.word,
@ -19237,6 +19281,7 @@
}
var newWord = {
id: graph.totalNodeCnt++,
word: word,
entity: itemset[word],
freq: count,
topEntries: s1.DBs.slice(0, 5),
@ -19306,7 +19351,6 @@
var str = words.map(function(w) {
return w.entity;
}).join(' ');
console.log(str);
}
var SentenTreeModel = function() {
@ -19336,7 +19380,6 @@
var size = tokenizedData.computeSize();
this.supportRange = [Math.max(size * minSupportRatio, minSupportCount), size * maxSupportRatio];
console.log(this.supportRange)
var _supportRange = _slicedToArray(this.supportRange, 2),
minSupport = _supportRange[0],
@ -19386,11 +19429,9 @@
key: 'getRenderedGraphs',
value: function getRenderedGraphs(limit) {
var graphs = arguments.length === 1 ? this.graphs.slice(0, limit) : this.graphs;
console.log("slice")
var renderedGraphs = graphs.map(function(g) {
return g.toRenderedGraph();
});
console.log("toRenderedGraph")
var globalFreqRange = [(0, _lodash.min)(renderedGraphs.map(function(g) {
return g.freqRange[0];
})), (0, _lodash.max)(renderedGraphs.map(function(g) {
@ -37869,15 +37910,12 @@
let counter = 1;
while (heap.size() > 0) {
console.log(`in while ${counter++}`)
var parent = heap.pop();
console.log(heap)
if (parent.merged) {
continue;
}
var groups = [];
console.log(parent.data.id)
if (parent.leftLinks.length > 1) {
var lNodes = parent.leftLinks.map(function(l) {
return l.source;
@ -37891,7 +37929,6 @@
});
groups = groups.concat(this.groupMergeableNodes(rNodes));
}
console.log(groups)
if (groups.length > 0) {
var newNodes = groups.map(function(group) {
@ -38119,7 +38156,6 @@
var RenderedGraph = function() {
function RenderedGraph(rawGraph) {
console.log(arguments)
var _ref = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {},
_ref$bundle = _ref.bundle,
bundle = _ref$bundle === undefined ? true : _ref$bundle,
@ -38152,10 +38188,8 @@
this.assignNodeIds(nodes);
console.log(bundle)
if (bundle) {
var bundled = new _GraphBundler2.default(nodes, links).bundle();
console.log(bundled)
this.nodes = bundled.nodes;
this.links = bundled.links;
this.assignNodeIds(this.nodes);
@ -43071,11 +43105,10 @@
var l3 = l * l * l;
var hs = 2 * -weight / (D2 * l3);
if (!isFinite(gs))
console.log(gs);
for (i = 0; i < this.k; ++i) {
this.g[i][u] += d[i] * gs;
Huu[i] -= this.H[i][u][v] = hs * (l3 + D * (d2[i] - sd2) + l * sd2);
}
for (i = 0; i < this.k; ++i) {
this.g[i][u] += d[i] * gs;
Huu[i] -= this.H[i][u][v] = hs * (l3 + D * (d2[i] - sd2) + l * sd2);
}
}
for (i = 0; i < this.k; ++i)
maxH = Math.max(maxH, this.H[i][u][u] = Huu[i]);

@ -41,7 +41,8 @@
<button class="general-button" type="button" id="editSWButton" style="margin:10px 0px;" onclick="showStopwordEditor()">編輯停用詞</button>
</div>
<div id='rawText' class=''>
<textarea id='rawTextBox' rows=25></textarea>
<textarea id='rawTextBox' rows=25 placeholder="輸入要視覺化的文字
換行為斷句"></textarea>
<button class='general-button' style='margin: 10px 0px' onclick="submit()">提交</button>
</div>
<div>

@ -15,6 +15,11 @@
<button class="w3-button w3-teal" type="button" onclick="location.href='/generalTxt'">泛用文字視覺化工具</button>
</div>
<div id='pttPageWindow' class='info99 hidden'>
<div id='pttPageWindowContent'>
<iframe src="http://ptt.cc" style='width:100%; height:100%' frameborder=0></iframe>
</div>
</div>
<div id="nodeTitle" class="nodeTitle hidden">
<div id="nodeTitleContent"></div>
</div>
@ -66,21 +71,24 @@
<div id="heading">
<h2>{{title}}</h2>
<p>SentenTree <a href="https://github.com/twitter/SentenTree">https://github.com/twitter/SentenTree</a></p>
<p id="comment">同時使用關鍵詞和詞性搜尋的時候,必須選擇所設關鍵詞本身的詞性,否則會搜尋不到結果。</p>
<p id="comment">點選圖上的單詞可以查看單詞的資訊,觀看原始文章,也會快速切換關鍵字。</p>
<p id="comment">若搜尋到的文章超過50篇圖表僅會顯示推文數最多的前50篇文章。</p>
<p id="comment">停用詞的處理改為將不直接忽略停用詞,但是停用詞不會被設為主要單詞,並且大小會比其他單詞更小。</p>
<p>可直接從下方範例搜尋條件中選擇</p>
<div>
<button class='w3-button w3-green' onclick='loadTemplate(0)'>範例1</button>
<button class='w3-button w3-green' onclick='loadTemplate(1)'>範例2</button>
<button class='w3-button w3-green' onclick='loadTemplate(2)'>範例3</button>
</div>
<div id="searchingTarget">
<form name="form" enctype="multipart/form-data">
<span>搜尋日期範圍 從</span>
<input id="startDate" name="startDate" type="date">
<span></span>
<input id="endDate" name="endDate" type="date">
<button class="general-button" type="button" id="resetDateButton" onclick="setDate(defaultStartDate, defaultEndDate)">預設日期</button>
<button class="general-button" type="button" id="resetDateButton" onclick="setDate(defaultStartDate, defaultEndDate)">預設日期</button><br>
<input id="keywordBox" type="text" name="message" placeholder="輸入關鍵詞">
<button class="general-button" type="button" id="sendButton" onclick="sendRequest()">搜尋關鍵字</button>
<button class="general-button" type="button" id="resetButton" onclick="document.getElementById('keywordBox').value=''">清除關鍵字</button>
<button class="general-button" type="button" id="editSWButton" onclick="showIdfEditor()">編輯停用詞</button>
<button class="general-button" type="button" id="editSWButton" onclick="showStopwordEditor()">編輯停用詞</button>
<button class="general-button" type="button" id="editIdeButton" onclick="showIdfEditor()">編輯詞頻</button>
</form>
</div>
<div id="advancedArea">
@ -95,6 +103,12 @@
<input type="checkbox" id="other" checked="checked">其他詞性
</form>
</div>
<div id='modeSelector'>
<button type="button" id='mode0' class='w3-button' onclick="changeMode(0);destroyCurrentGraph();buildSentetree()">相鄰詞模式</button>
<button type="button" id='mode1' class='w3-button ' onclick="changeMode(1);destroyCurrentGraph();buildSentetree()">Idf模式</button>
<button type="button" id='mode2' class='w3-button ' onclick="changeMode(2);destroyCurrentGraph();buildSentetree()">停用詞模式</button>
<button type="button" id='mode3' class='w3-button ' onclick="changeMode(3);destroyCurrentGraph();buildSentetree()">根單詞停用模式</button>
</div>
</div>
<div id="graphInfo"></div>
<div id="graph">

@ -67,6 +67,12 @@
<p>SentenTree <a href="https://github.com/twitter/SentenTree">https://github.com/twitter/SentenTree</a></p>
<p id="comment">同時使用關鍵詞和詞性搜尋的時候,必須選擇所設關鍵詞本身的詞性,否則會搜尋不到結果。</p>
<p id="comment">鄉民ID和文章aid之中必須至少設定一個搜尋條件。</p>
<p>可直接從下方範例搜尋條件中選擇</p>
<div>
<button class='w3-button w3-green' onclick='loadTemplate(0)'>範例1</button>
<button class='w3-button w3-green' onclick='loadTemplate(1)'>範例2</button>
<button class='w3-button w3-green' onclick='loadTemplate(2)'>範例3</button>
</div>
<div id="searchingTarget">
<form name="form" enctype="multipart/form-data">
<input id="idBox" class='searchBox' type="text" name="message" placeholder="輸入鄉民ID">
@ -75,6 +81,7 @@
<button class="general-button" type="button" id="sendButton" onclick="sendRequest()">搜尋關鍵字</button>
<button class="general-button" type="button" id="resetButton" onclick="document.getElementById('keywordBox').value=''">清除關鍵字</button>
<button class="general-button" type="button" id="editSWButton" onclick="showStopwordEditor()">編輯停用詞</button>
<button class="general-button" type="button" id="editIdeButton" onclick="showIdfEditor()">編輯詞頻</button>
</form>
</div>
<div id="advancedArea">
@ -89,6 +96,12 @@
<input type="checkbox" id="other" checked="checked">其他詞性
</form>
</div>
<div id='modeSelector'>
<button type="button" id='mode0' class='w3-button' onclick="changeMode(0)">相鄰詞模式</button>
<button type="button" id='mode1' class='w3-button ' onclick="changeMode(1)">Idf模式</button>
<button type="button" id='mode2' class='w3-button ' onclick="changeMode(2)">停用詞模式</button>
<button type="button" id='mode3' class='w3-button ' onclick="changeMode(3)">根單詞停用模式</button>
</div>
</div>
<div id="graphInfo"></div>
<div id="graph">

Loading…
Cancel
Save