diff --git a/app.py b/app.py index 3ade04a..af48a53 100755 --- a/app.py +++ b/app.py @@ -67,7 +67,7 @@ def ptt_push(): @app.route('/ptt_push/init', methods=['POST']) def pttPushInit(): - author = next(pttPush.getMostFrequentAuthor()) + author = 'yuetsu' pushes = pttPush.findAuthorPush( author=[author], stopwords=pttPush.defaultStopWords) result: dict = { diff --git a/static/js/loadIdfTable.js b/static/js/loadIdfTable.js index 6783e11..aed7f77 100644 --- a/static/js/loadIdfTable.js +++ b/static/js/loadIdfTable.js @@ -1,6 +1,7 @@ var idfTable var idfTableOrig var idfBase = 1000 +var mode = 0 $.ajax({ url: '/resource/idfTable.json', async: false, diff --git a/static/js/ptt.js b/static/js/ptt.js index 7e34125..3379fad 100755 --- a/static/js/ptt.js +++ b/static/js/ptt.js @@ -76,6 +76,7 @@ function init() { hideIdfEditor() } }) + changeMode(0) } function clearStopWord() { @@ -107,6 +108,19 @@ function addStopWord() { $('#newStopWord').val('') } +function changeMode(_mode) { + for (i = 0; i < 4; i++) { + if (i == _mode) { + $('#modeSelector button').eq(i).css("color", "#aaa") + } else { + $('#modeSelector button').eq(i).css("color", "#000") + } + } + mode = _mode + destroyCurrentGraph() + buildSentetree() +} + function scrollIdfList() { let targetWord = $('#idfTarget').val() let wordList = $("#ieContainer").find("tr").slice(1).map((_, a) => { return $($(a).find("td")[0]).html() }).get() @@ -386,7 +400,8 @@ function buildSentetree() { gapBetweenGraph: 10 }); console.log(tree) - tree.data(model.getRenderedGraphs(2)) + let nGraph = globKeyword == "" ? 5 : 2 + tree.data(model.getRenderedGraphs(nGraph)) .on('nodeClick', node => { if ('mergedData' in node.data) { seqList = node.data.mergedData.map((d) => { diff --git a/static/js/pttPush.js b/static/js/pttPush.js index 65c4b72..27ec55e 100644 --- a/static/js/pttPush.js +++ b/static/js/pttPush.js @@ -1,6 +1,7 @@ init() var tsvPath = '' var titlePath = '' +var tsvString var defaultStartDate var defaultEndDate var totalPosts @@ -111,6 +112,20 @@ function addStopWord() { $('#newStopWord').val('') } +function changeMode(_mode) { + for (i = 0; i < 4; i++) { + if (i == _mode) { + $('#modeSelector button').eq(i).css("color", "#aaa") + } else { + $('#modeSelector button').eq(i).css("color", "#000") + } + } + mode = _mode + destroyCurrentGraph() + d3.select('#graph').append('div').attr('id', 'vis') + buildSentetree(tsvString) +} + function showIdfEditor() { $(window).unbind('keydown') $(window).keydown(function(event) { @@ -190,6 +205,17 @@ function showIdfEditor() { $('#idfEditorLayer').removeClass('hidden') } +function hideIdfEditor() { + $(window).unbind('keydown') + $(window).keydown(function(event) { + if (event.keyCode == 13) { + event.preventDefault() + sendRequest() + } + }) + $('#idfEditorLayer').addClass('hidden') +} + function showStopwordEditor() { console.log(stopwords) $(window).unbind('keydown') @@ -296,7 +322,7 @@ function sendRequest() { function changeGraph(data) { console.log(data) - let tsvString = data.tsv + tsvString = data.tsv let json = JSON.parse(data.json) destroyCurrentGraph() d3.select('#graph').append('div').attr('id', 'vis') diff --git a/static/node_modules/sententree/dist/sententree-standalone.js b/static/node_modules/sententree/dist/sententree-standalone.js index ad403e4..00cc1b9 100755 --- a/static/node_modules/sententree/dist/sententree-standalone.js +++ b/static/node_modules/sententree/dist/sententree-standalone.js @@ -19112,6 +19112,7 @@ var word = null; var count = 0; var len = seq.words.length; + var root = seq.newWord ? seq.newWord.word : undefined var _loop = function _loop(s) { var fdist = {}; @@ -19133,6 +19134,8 @@ } }); + console.log(seq) + var maxw = null; var maxc = 0; @@ -19143,20 +19146,65 @@ words.forEach(function(w) { var value = fdist[w]; - //if (value < maxSupport && (value / Math.max(idfTable[itemset[w]], idfBase)) * idfBase > maxc) { - //if (value < maxSupport && value > maxc && (stopwords.indexOf(itemset[w]) < 0 || isNotRoot)) { - if (value < maxSupport && value > maxc && stopwords.indexOf(itemset[w]) < 0) { - maxw = +w; - //maxc = (value / Math.max(idfTable[itemset[w]], idfBase)) * idfBase - maxc = value + var distRatio = 0.1 + if (root) { + var dist = Math.abs(words.indexOf(w) - words.indexOf(root)) + } + switch (mode) { + case 0: + if (!root) { + if (value < maxSupport && value > maxc && stopwords.indexOf(itemset[w]) < 0) { + maxw = +w + maxc = value + } + } else { + if (value < maxSupport && (value * Math.pow(distRatio, dist - 1)) > maxc && stopwords.indexOf(itemset[w]) < 0 && dist == 1) { + maxw = +w + maxc = value + console.log(maxc) + } + } + break + + case 1: + if (value < maxSupport && (value / Math.max(idfTable[itemset[w]], idfBase)) * idfBase > maxc) { + //if (value < maxSupport && value > maxc && (stopwords.indexOf(itemset[w]) < 0 || isNotRoot)) { + //if (value < maxSupport && value > maxc && stopwords.indexOf(itemset[w]) < 0) { + + maxw = +w; + maxc = (value / Math.max(idfTable[itemset[w]], idfBase)) * idfBase + //maxc = value + } + break + case 3: + //if (value < maxSupport && (value / Math.max(idfTable[itemset[w]], idfBase)) * idfBase > maxc) { + if (value < maxSupport && value > maxc && (stopwords.indexOf(itemset[w]) < 0 || isNotRoot)) { + //if (value < maxSupport && value > maxc && stopwords.indexOf(itemset[w]) < 0) { + + maxw = +w; + //maxc = (value / Math.max(idfTable[itemset[w]], idfBase)) * idfBase + maxc = value + } + break + case 2: + //if (value < maxSupport && value > maxc && (stopwords.indexOf(itemset[w]) < 0 || isNotRoot)) { + if (value < maxSupport && value > maxc && stopwords.indexOf(itemset[w]) < 0) { + + maxw = +w; + //maxc = (value / Math.max(idfTable[itemset[w]], idfBase)) * idfBase + maxc = value + } + break + default: + break } }); - console.log(itemset) if (maxc > count) { pos = s; word = maxw; count = maxc; + console.log({ 'word#': maxw, 'word': itemset[maxw], 'root': seq.newWord ? seq.newWord.entity : null, 'count': root ? count : "root" }) } }; @@ -19188,7 +19236,6 @@ } } } - console.log({ 's0': s0, 's1': s1 }) return { word: word, pos: pos, count: count, s0: s0, s1: s1 }; } @@ -19206,12 +19253,10 @@ seqs.push(rootSeq); var leafSeqs = []; - console.log(JSON.parse(JSON.stringify(seqs))) while (!seqs.empty() && expandCnt > 0) { /* find the candidate sequence with largest support DB */ var s = seqs.pop(); - console.log({ 's': s, 'seqs': seqs }) var graph = s.graph; var s0 = s.r; var s1 = s.l; @@ -19219,7 +19264,6 @@ if (!s0 && !s1) { /* find the next frequent sequence */ var result = growSeq(s, terms, minSupport, maxSupport, itemset); - console.log(result) s0 = result.s0; s1 = result.s1; var word = result.word, @@ -19237,6 +19281,7 @@ } var newWord = { id: graph.totalNodeCnt++, + word: word, entity: itemset[word], freq: count, topEntries: s1.DBs.slice(0, 5), @@ -19306,7 +19351,6 @@ var str = words.map(function(w) { return w.entity; }).join(' '); - console.log(str); } var SentenTreeModel = function() { @@ -19336,7 +19380,6 @@ var size = tokenizedData.computeSize(); this.supportRange = [Math.max(size * minSupportRatio, minSupportCount), size * maxSupportRatio]; - console.log(this.supportRange) var _supportRange = _slicedToArray(this.supportRange, 2), minSupport = _supportRange[0], @@ -19386,11 +19429,9 @@ key: 'getRenderedGraphs', value: function getRenderedGraphs(limit) { var graphs = arguments.length === 1 ? this.graphs.slice(0, limit) : this.graphs; - console.log("slice") var renderedGraphs = graphs.map(function(g) { return g.toRenderedGraph(); }); - console.log("toRenderedGraph") var globalFreqRange = [(0, _lodash.min)(renderedGraphs.map(function(g) { return g.freqRange[0]; })), (0, _lodash.max)(renderedGraphs.map(function(g) { @@ -37869,15 +37910,12 @@ let counter = 1; while (heap.size() > 0) { - console.log(`in while ${counter++}`) var parent = heap.pop(); - console.log(heap) if (parent.merged) { continue; } var groups = []; - console.log(parent.data.id) if (parent.leftLinks.length > 1) { var lNodes = parent.leftLinks.map(function(l) { return l.source; @@ -37891,7 +37929,6 @@ }); groups = groups.concat(this.groupMergeableNodes(rNodes)); } - console.log(groups) if (groups.length > 0) { var newNodes = groups.map(function(group) { @@ -38119,7 +38156,6 @@ var RenderedGraph = function() { function RenderedGraph(rawGraph) { - console.log(arguments) var _ref = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {}, _ref$bundle = _ref.bundle, bundle = _ref$bundle === undefined ? true : _ref$bundle, @@ -38152,10 +38188,8 @@ this.assignNodeIds(nodes); - console.log(bundle) if (bundle) { var bundled = new _GraphBundler2.default(nodes, links).bundle(); - console.log(bundled) this.nodes = bundled.nodes; this.links = bundled.links; this.assignNodeIds(this.nodes); @@ -43071,11 +43105,10 @@ var l3 = l * l * l; var hs = 2 * -weight / (D2 * l3); if (!isFinite(gs)) - console.log(gs); - for (i = 0; i < this.k; ++i) { - this.g[i][u] += d[i] * gs; - Huu[i] -= this.H[i][u][v] = hs * (l3 + D * (d2[i] - sd2) + l * sd2); - } + for (i = 0; i < this.k; ++i) { + this.g[i][u] += d[i] * gs; + Huu[i] -= this.H[i][u][v] = hs * (l3 + D * (d2[i] - sd2) + l * sd2); + } } for (i = 0; i < this.k; ++i) maxH = Math.max(maxH, this.H[i][u][u] = Huu[i]); diff --git a/templates/ptt.html b/templates/ptt.html index 23840cb..1e31493 100755 --- a/templates/ptt.html +++ b/templates/ptt.html @@ -80,7 +80,8 @@ - + +
@@ -95,6 +96,12 @@ 其他詞性
+
+ + + + +
diff --git a/templates/ptt_push.html b/templates/ptt_push.html index 94de469..3639d19 100644 --- a/templates/ptt_push.html +++ b/templates/ptt_push.html @@ -75,6 +75,7 @@ +
@@ -89,6 +90,12 @@ 其他詞性
+
+ + + + +