diff --git a/app.py b/app.py index fd3489b..392c94f 100755 --- a/app.py +++ b/app.py @@ -67,7 +67,7 @@ def ptt_push(): @app.route('/ptt_push/init', methods=['POST']) def pttPushInit(): - author = next(pttPush.getMostFrequentAuthor()) + author = 'a58461351' pushes = pttPush.findAuthorPush( author=[author], stopwords=pttPush.defaultStopWords) result: dict = { diff --git a/dataHandlerPTT.py b/dataHandlerPTT.py index 3c0a5d4..0b62c86 100755 --- a/dataHandlerPTT.py +++ b/dataHandlerPTT.py @@ -31,6 +31,8 @@ with open('/home/vis/pttDatabase/PTTData/Gossiping/content/content.pck', 'rb') a defaultStopWords = [] data = PTTData('Gossiping', '/home/vis/pttDatabase/PTTData') +sentence_length = 100 +use_push_count = False with open('resource/stopWords.txt', 'r', encoding='UTF-8') as file: for word in file.readlines(): @@ -75,7 +77,7 @@ def contentProcess(content, text): result = [] for i in cutted: result.append(i) - if (len(result) >= 50): + if (len(result) >= sentence_length): sentenses.append(result.copy()) result = [] if (result != []): @@ -324,7 +326,7 @@ def getDefault(startDate, endDate): 'date': datetime.strptime(i['date'], '%Y%m%d%H%M%S').strftime("%a %b %d %H:%M:%S %Y"), 'part': i['content'][max(0, cut[0] - 20): min(len(i['content']), cut[1])].replace('\n', '') } - result.append([len(result), seq, 1000 + i['pushes']]) + result.append([len(result), seq, (1000 + i['pushes'])if use_push_count else 3000]) fileString = io.StringIO() writer = csv.writer(fileString, delimiter='\t') diff --git a/resource/stopWords.txt b/resource/stopWords.txt index 9aef6c5..4b61f5a 100755 --- a/resource/stopWords.txt +++ b/resource/stopWords.txt @@ -67,4 +67,9 @@ 那 只 所以 -講 \ No newline at end of file +講 +記者 +看 +者 +媒體 +和 \ No newline at end of file diff --git a/static/css/main.css b/static/css/main.css index 3480344..b38b365 100755 --- a/static/css/main.css +++ b/static/css/main.css @@ -112,6 +112,24 @@ html { transition-duration: 0.5s; } +#pttPageWindowContent { + display: block; + position: fixed; + background-color: #FFF; + left: 50%; + top: 50%; + transform: translate(-50%, -50%); + width: 90%; + height: 90%; + border: lightgray; + border-width: 1px; + border-style: solid; + border-radius: 20px; + padding: 26px; + z-index: 99; + box-shadow: 0px 5px 20px rgba(0, 0, 0, .3); +} + .deleteListElement { position: absolute; right: 0; @@ -141,6 +159,18 @@ li a { } .info { + background-color: rgba(255, 255, 255, 0.6); + animation: fadeIn 0.2s; + animation-fill-mode: forwards; + position: fixed; + display: flex; + top: 0; + width: 100%; + height: 100%; + z-index: 90; +} + +.info99 { background-color: rgba(255, 255, 255, 0.6); animation: fadeIn 0.2s; animation-fill-mode: forwards; @@ -189,7 +219,7 @@ li a { height: auto; border-radius: 15px; padding: 10px 15px; - z-index: 99; + z-index: 90; align-content: center; } diff --git a/static/js/loadIdfTable.js b/static/js/loadIdfTable.js index 6783e11..aed7f77 100644 --- a/static/js/loadIdfTable.js +++ b/static/js/loadIdfTable.js @@ -1,6 +1,7 @@ var idfTable var idfTableOrig var idfBase = 1000 +var mode = 0 $.ajax({ url: '/resource/idfTable.json', async: false, diff --git a/static/js/ptt.js b/static/js/ptt.js index 7e34125..235a60d 100755 --- a/static/js/ptt.js +++ b/static/js/ptt.js @@ -76,6 +76,41 @@ function init() { hideIdfEditor() } }) + $('#pttPageWindow').click(function(e) { + if ($('#pttPageWindow').is(e.target)) { + hidePTTPage() + } + }) + changeMode(0) + destroyCurrentGraph() + buildSentetree() +} + +function loadTemplate(num) { + templates = [{ + startDate: '2020-12-01', + endDate: '2020-12-31', + keyword: '', + mode: 1 + }, + { + startDate: '2020-01-01', + endDate: '2020-03-01', + keyword: '衛生紙', + mode: 2 + }, + { + startDate: '2020-01-11', + endDate: '2020-01-12', + keyword: '', + mode: 2 + } + ] + chosenTemp = templates[num] + setDate(chosenTemp.startDate, chosenTemp.endDate) + $('#keywordBox').val(chosenTemp.keyword) + changeMode(chosenTemp.mode) + sendRequest() } function clearStopWord() { @@ -107,6 +142,17 @@ function addStopWord() { $('#newStopWord').val('') } +function changeMode(_mode) { + for (i = 0; i < 4; i++) { + if (i == _mode) { + $('#modeSelector button').eq(i).css("color", "#aaa") + } else { + $('#modeSelector button').eq(i).css("color", "#000") + } + } + mode = _mode +} + function scrollIdfList() { let targetWord = $('#idfTarget').val() let wordList = $("#ieContainer").find("tr").slice(1).map((_, a) => { return $($(a).find("td")[0]).html() }).get() @@ -156,8 +202,12 @@ function showIdfEditor() { .append($('') .attr('style', 'position: sticky; top: 0; background: white;') .append('單詞')) - .append($('')) - .append($('').attr('class', 'w3-right-align') + .append($('') + .attr('class', 'w3-center-align') + .attr('style', 'position: sticky; top: 0; background: white;') + .append('操作')) + .append($('') + .attr('class', 'w3-right-align') .attr('style', 'position: sticky; top: 0; background: white;') .append('單詞頻率') ) @@ -245,6 +295,15 @@ function hideIdfEditor() { $('#idfEditorLayer').addClass('hidden') } +function showPTTPage(url) { + $('#pttPageWindowContent iframe').attr('src', url) + $('#pttPageWindow').removeClass('hidden') +} + +function hidePTTPage() { + $('#pttPageWindow').addClass('hidden') +} + function updateIdfTable() { let wordList = $("#ieContainer").find("tr").slice(1).map((_, a) => { return [ @@ -301,6 +360,23 @@ function closeEventListner() { } function sendRequest() { + content = getContent() + startDate = $('#startDate').val() + endDate = $('#endDate').val() + console.log(content) + $.ajax({ + type: 'POST', + url: '/addRequest', + data: content, + contentType: 'application/json', + success: function(data) { + console.log(data) + changeGraph(data.Result) + } + }) +} + +function getContent() { content = JSON.stringify({ startDate: $('#startDate').val(), endDate: $('#endDate').val(), @@ -316,19 +392,7 @@ function sendRequest() { other: $('#other').is(':checked') } }) - startDate = $('#startDate').val() - endDate = $('#endDate').val() - console.log(content) - $.ajax({ - type: 'POST', - url: '/addRequest', - data: content, - contentType: 'application/json', - success: function(data) { - console.log(data) - changeGraph(data.Result) - } - }) + return content } function changeGraph(data) { @@ -386,7 +450,8 @@ function buildSentetree() { gapBetweenGraph: 10 }); console.log(tree) - tree.data(model.getRenderedGraphs(2)) + let nGraph = globKeyword == "" ? 5 : 2 + tree.data(model.getRenderedGraphs(nGraph)) .on('nodeClick', node => { if ('mergedData' in node.data) { seqList = node.data.mergedData.map((d) => { @@ -451,8 +516,9 @@ function buildSentetree() { $('#titleListContainer').empty() for (i of titleList) { $('#titleListContainer').append( - $('
  • ').attr('class', 'w3-panel').append( - $('').attr('href', i.url).attr('target', '_blank').append( + $('
  • ').attr('class', 'w3-panel') + .css('cursor', 'pointer').append( + $('

    ').attr('target', '_blank').append( $('

    ').html(i.title) ).append( $('').attr('style', 'margin: 0px 10px').html(i.author) @@ -461,7 +527,10 @@ function buildSentetree() { ).append( $('').attr('style', 'margin: 0px 10px').html('推文數:' + i.pushes) ) - ) + ).click(function() { + let indx = $(this).index() + showPTTPage((titleList[indx].url).replace('www.ptt.cc', 'www.pttweb.cc')) + }) ) } }) diff --git a/static/js/pttPush.js b/static/js/pttPush.js index 65c4b72..b5922dd 100644 --- a/static/js/pttPush.js +++ b/static/js/pttPush.js @@ -1,6 +1,7 @@ init() var tsvPath = '' var titlePath = '' +var tsvString var defaultStartDate var defaultEndDate var totalPosts @@ -24,6 +25,7 @@ function init() { console.log(wordPushList) $('#idBox').val(data.Result.author) buildSentetree(tsvString) + changeMode(2) } }) $(document).ready(function() { @@ -65,6 +67,31 @@ function init() { }) } +function loadTemplate(num) { + templates = [{ + userId: '', + aid: '1Vv4iFY6', + keyword: '', + mode: 0, + }, { + userId: '', + aid: '1VyJ2vP_', + keyword: '', + mode: 0 + }, { + userId: 'xetherz3', + aid: '', + keyword: '', + mode: 0 + }] + let chosenTemplate = templates[num] + $('#idBox').val(chosenTemplate.userId) + $('#titleBox').val(chosenTemplate.aid) + $('#keywordBox').val(chosenTemplate.keyword) + changeMode(0) + sendRequest() +} + function clearStopWord() { stopwords = [] $('#sweContainer').html('') @@ -111,6 +138,20 @@ function addStopWord() { $('#newStopWord').val('') } +function changeMode(_mode) { + for (i = 0; i < 4; i++) { + if (i == _mode) { + $('#modeSelector button').eq(i).css("color", "#aaa") + } else { + $('#modeSelector button').eq(i).css("color", "#000") + } + } + mode = _mode + destroyCurrentGraph() + d3.select('#graph').append('div').attr('id', 'vis') + buildSentetree(tsvString) +} + function showIdfEditor() { $(window).unbind('keydown') $(window).keydown(function(event) { @@ -123,7 +164,10 @@ function showIdfEditor() { .append($('') .attr('style', 'position: sticky; top: 0; background: white;') .append('單詞')) - .append($('')) + .append($('') + .attr('class', 'w3-center-align') + .attr('style', 'position: sticky; top: 0; background: white;') + .append('操作')) .append($('').attr('class', 'w3-right-align') .attr('style', 'position: sticky; top: 0; background: white;') .append('單詞頻率') @@ -190,6 +234,17 @@ function showIdfEditor() { $('#idfEditorLayer').removeClass('hidden') } +function hideIdfEditor() { + $(window).unbind('keydown') + $(window).keydown(function(event) { + if (event.keyCode == 13) { + event.preventDefault() + sendRequest() + } + }) + $('#idfEditorLayer').addClass('hidden') +} + function showStopwordEditor() { console.log(stopwords) $(window).unbind('keydown') @@ -296,7 +351,7 @@ function sendRequest() { function changeGraph(data) { console.log(data) - let tsvString = data.tsv + tsvString = data.tsv let json = JSON.parse(data.json) destroyCurrentGraph() d3.select('#graph').append('div').attr('id', 'vis') diff --git a/static/node_modules/sententree/dist/sententree-standalone.js b/static/node_modules/sententree/dist/sententree-standalone.js index ad403e4..33b4503 100755 --- a/static/node_modules/sententree/dist/sententree-standalone.js +++ b/static/node_modules/sententree/dist/sententree-standalone.js @@ -19112,6 +19112,7 @@ var word = null; var count = 0; var len = seq.words.length; + var root = seq.newWord ? seq.newWord.word : undefined var _loop = function _loop(s) { var fdist = {}; @@ -19133,6 +19134,8 @@ } }); + console.log(seq) + var maxw = null; var maxc = 0; @@ -19143,20 +19146,65 @@ words.forEach(function(w) { var value = fdist[w]; - //if (value < maxSupport && (value / Math.max(idfTable[itemset[w]], idfBase)) * idfBase > maxc) { - //if (value < maxSupport && value > maxc && (stopwords.indexOf(itemset[w]) < 0 || isNotRoot)) { - if (value < maxSupport && value > maxc && stopwords.indexOf(itemset[w]) < 0) { - maxw = +w; - //maxc = (value / Math.max(idfTable[itemset[w]], idfBase)) * idfBase - maxc = value + var distRatio = 0.1 + if (root) { + var dist = Math.abs(words.indexOf(w) - words.indexOf(root)) + } + switch (mode) { + case 0: + if (!root) { + if (value < maxSupport && value > maxc && stopwords.indexOf(itemset[w]) < 0) { + maxw = +w + maxc = value + } + } else { + if (value < maxSupport && value > maxc && dist < 2 && stopwords.indexOf(itemset[w]) < 0 && dist == 1) { + maxw = +w + maxc = value + console.log(maxc) + } + } + break + + case 1: + if (value < maxSupport && (value / Math.max(idfTable[itemset[w]], idfBase)) * idfBase > maxc) { + //if (value < maxSupport && value > maxc && (stopwords.indexOf(itemset[w]) < 0 || isNotRoot)) { + //if (value < maxSupport && value > maxc && stopwords.indexOf(itemset[w]) < 0) { + + maxw = +w; + maxc = (value / Math.max(idfTable[itemset[w]], idfBase)) * idfBase + //maxc = value + } + break + case 3: + //if (value < maxSupport && (value / Math.max(idfTable[itemset[w]], idfBase)) * idfBase > maxc) { + if (value < maxSupport && value > maxc && (stopwords.indexOf(itemset[w]) < 0 || isNotRoot)) { + //if (value < maxSupport && value > maxc && stopwords.indexOf(itemset[w]) < 0) { + + maxw = +w; + //maxc = (value / Math.max(idfTable[itemset[w]], idfBase)) * idfBase + maxc = value + } + break + case 2: + //if (value < maxSupport && value > maxc && (stopwords.indexOf(itemset[w]) < 0 || isNotRoot)) { + if (value < maxSupport && value > maxc && stopwords.indexOf(itemset[w]) < 0) { + + maxw = +w; + //maxc = (value / Math.max(idfTable[itemset[w]], idfBase)) * idfBase + maxc = value + } + break + default: + break } }); - console.log(itemset) if (maxc > count) { pos = s; word = maxw; count = maxc; + console.log({ 'word#': maxw, 'word': itemset[maxw], 'root': seq.newWord ? seq.newWord.entity : null, 'count': root ? count : "root" }) } }; @@ -19188,7 +19236,6 @@ } } } - console.log({ 's0': s0, 's1': s1 }) return { word: word, pos: pos, count: count, s0: s0, s1: s1 }; } @@ -19206,12 +19253,10 @@ seqs.push(rootSeq); var leafSeqs = []; - console.log(JSON.parse(JSON.stringify(seqs))) while (!seqs.empty() && expandCnt > 0) { /* find the candidate sequence with largest support DB */ var s = seqs.pop(); - console.log({ 's': s, 'seqs': seqs }) var graph = s.graph; var s0 = s.r; var s1 = s.l; @@ -19219,7 +19264,6 @@ if (!s0 && !s1) { /* find the next frequent sequence */ var result = growSeq(s, terms, minSupport, maxSupport, itemset); - console.log(result) s0 = result.s0; s1 = result.s1; var word = result.word, @@ -19237,6 +19281,7 @@ } var newWord = { id: graph.totalNodeCnt++, + word: word, entity: itemset[word], freq: count, topEntries: s1.DBs.slice(0, 5), @@ -19306,7 +19351,6 @@ var str = words.map(function(w) { return w.entity; }).join(' '); - console.log(str); } var SentenTreeModel = function() { @@ -19336,7 +19380,6 @@ var size = tokenizedData.computeSize(); this.supportRange = [Math.max(size * minSupportRatio, minSupportCount), size * maxSupportRatio]; - console.log(this.supportRange) var _supportRange = _slicedToArray(this.supportRange, 2), minSupport = _supportRange[0], @@ -19386,11 +19429,9 @@ key: 'getRenderedGraphs', value: function getRenderedGraphs(limit) { var graphs = arguments.length === 1 ? this.graphs.slice(0, limit) : this.graphs; - console.log("slice") var renderedGraphs = graphs.map(function(g) { return g.toRenderedGraph(); }); - console.log("toRenderedGraph") var globalFreqRange = [(0, _lodash.min)(renderedGraphs.map(function(g) { return g.freqRange[0]; })), (0, _lodash.max)(renderedGraphs.map(function(g) { @@ -37869,15 +37910,12 @@ let counter = 1; while (heap.size() > 0) { - console.log(`in while ${counter++}`) var parent = heap.pop(); - console.log(heap) if (parent.merged) { continue; } var groups = []; - console.log(parent.data.id) if (parent.leftLinks.length > 1) { var lNodes = parent.leftLinks.map(function(l) { return l.source; @@ -37891,7 +37929,6 @@ }); groups = groups.concat(this.groupMergeableNodes(rNodes)); } - console.log(groups) if (groups.length > 0) { var newNodes = groups.map(function(group) { @@ -38119,7 +38156,6 @@ var RenderedGraph = function() { function RenderedGraph(rawGraph) { - console.log(arguments) var _ref = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {}, _ref$bundle = _ref.bundle, bundle = _ref$bundle === undefined ? true : _ref$bundle, @@ -38152,10 +38188,8 @@ this.assignNodeIds(nodes); - console.log(bundle) if (bundle) { var bundled = new _GraphBundler2.default(nodes, links).bundle(); - console.log(bundled) this.nodes = bundled.nodes; this.links = bundled.links; this.assignNodeIds(this.nodes); @@ -43071,11 +43105,10 @@ var l3 = l * l * l; var hs = 2 * -weight / (D2 * l3); if (!isFinite(gs)) - console.log(gs); - for (i = 0; i < this.k; ++i) { - this.g[i][u] += d[i] * gs; - Huu[i] -= this.H[i][u][v] = hs * (l3 + D * (d2[i] - sd2) + l * sd2); - } + for (i = 0; i < this.k; ++i) { + this.g[i][u] += d[i] * gs; + Huu[i] -= this.H[i][u][v] = hs * (l3 + D * (d2[i] - sd2) + l * sd2); + } } for (i = 0; i < this.k; ++i) maxH = Math.max(maxH, this.H[i][u][u] = Huu[i]); diff --git a/templates/generalTxt.html b/templates/generalTxt.html index 1915636..7c65940 100644 --- a/templates/generalTxt.html +++ b/templates/generalTxt.html @@ -41,7 +41,8 @@
    - +
    diff --git a/templates/ptt.html b/templates/ptt.html index 23840cb..78a09f6 100755 --- a/templates/ptt.html +++ b/templates/ptt.html @@ -15,6 +15,11 @@
    + @@ -66,21 +71,24 @@

    {{title}}

    SentenTree https://github.com/twitter/SentenTree

    -

    同時使用關鍵詞和詞性搜尋的時候,必須選擇所設關鍵詞本身的詞性,否則會搜尋不到結果。

    -

    點選圖上的單詞可以查看單詞的資訊,觀看原始文章,也會快速切換關鍵字。

    -

    若搜尋到的文章超過50篇,圖表僅會顯示推文數最多的前50篇文章。

    -

    停用詞的處理改為將不直接忽略停用詞,但是停用詞不會被設為主要單詞,並且大小會比其他單詞更小。

    +

    可直接從下方範例搜尋條件中選擇

    +
    + + + +
    搜尋日期範圍 從 - +
    - + +
    @@ -95,6 +103,12 @@ 其他詞性
    +
    + + + + +
    diff --git a/templates/ptt_push.html b/templates/ptt_push.html index 94de469..73de566 100644 --- a/templates/ptt_push.html +++ b/templates/ptt_push.html @@ -67,6 +67,12 @@

    SentenTree https://github.com/twitter/SentenTree

    同時使用關鍵詞和詞性搜尋的時候,必須選擇所設關鍵詞本身的詞性,否則會搜尋不到結果。

    鄉民ID和文章aid之中必須至少設定一個搜尋條件。

    +

    可直接從下方範例搜尋條件中選擇

    +
    + + + +
    @@ -75,6 +81,7 @@ +
    @@ -89,6 +96,12 @@ 其他詞性
    +
    + + + + +